1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2017 Intel Corporation 3 */ 4 #include <stdlib.h> 5 #include <stdbool.h> 6 #include <netinet/in.h> 7 8 #include <rte_mbuf.h> 9 #include <rte_malloc.h> 10 #include <rte_ethdev_driver.h> 11 #include <rte_ethdev_vdev.h> 12 #include <rte_tcp.h> 13 #include <rte_udp.h> 14 #include <rte_ip.h> 15 #include <rte_ip_frag.h> 16 #include <rte_devargs.h> 17 #include <rte_kvargs.h> 18 #include <rte_bus_vdev.h> 19 #include <rte_alarm.h> 20 #include <rte_cycles.h> 21 #include <rte_string_fns.h> 22 23 #include "rte_eth_bond.h" 24 #include "eth_bond_private.h" 25 #include "eth_bond_8023ad_private.h" 26 27 #define REORDER_PERIOD_MS 10 28 #define DEFAULT_POLLING_INTERVAL_10_MS (10) 29 #define BOND_MAX_MAC_ADDRS 16 30 31 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port) 32 33 /* Table for statistics in mode 5 TLB */ 34 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS]; 35 36 static inline size_t 37 get_vlan_offset(struct rte_ether_hdr *eth_hdr, uint16_t *proto) 38 { 39 size_t vlan_offset = 0; 40 41 if (rte_cpu_to_be_16(RTE_ETHER_TYPE_VLAN) == *proto || 42 rte_cpu_to_be_16(RTE_ETHER_TYPE_QINQ) == *proto) { 43 struct rte_vlan_hdr *vlan_hdr = 44 (struct rte_vlan_hdr *)(eth_hdr + 1); 45 46 vlan_offset = sizeof(struct rte_vlan_hdr); 47 *proto = vlan_hdr->eth_proto; 48 49 if (rte_cpu_to_be_16(RTE_ETHER_TYPE_VLAN) == *proto) { 50 vlan_hdr = vlan_hdr + 1; 51 *proto = vlan_hdr->eth_proto; 52 vlan_offset += sizeof(struct rte_vlan_hdr); 53 } 54 } 55 return vlan_offset; 56 } 57 58 static uint16_t 59 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) 60 { 61 struct bond_dev_private *internals; 62 63 uint16_t num_rx_total = 0; 64 uint16_t slave_count; 65 uint16_t active_slave; 66 int i; 67 68 /* Cast to structure, containing bonded device's port id and queue id */ 69 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue; 70 internals = bd_rx_q->dev_private; 71 slave_count = internals->active_slave_count; 72 active_slave = internals->active_slave; 73 74 for (i = 0; i < slave_count && nb_pkts; i++) { 75 uint16_t num_rx_slave; 76 77 /* Offset of pointer to *bufs increases as packets are received 78 * from other slaves */ 79 num_rx_slave = 80 rte_eth_rx_burst(internals->active_slaves[active_slave], 81 bd_rx_q->queue_id, 82 bufs + num_rx_total, nb_pkts); 83 num_rx_total += num_rx_slave; 84 nb_pkts -= num_rx_slave; 85 if (++active_slave == slave_count) 86 active_slave = 0; 87 } 88 89 if (++internals->active_slave >= slave_count) 90 internals->active_slave = 0; 91 return num_rx_total; 92 } 93 94 static uint16_t 95 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs, 96 uint16_t nb_pkts) 97 { 98 struct bond_dev_private *internals; 99 100 /* Cast to structure, containing bonded device's port id and queue id */ 101 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue; 102 103 internals = bd_rx_q->dev_private; 104 105 return rte_eth_rx_burst(internals->current_primary_port, 106 bd_rx_q->queue_id, bufs, nb_pkts); 107 } 108 109 static inline uint8_t 110 is_lacp_packets(uint16_t ethertype, uint8_t subtype, struct rte_mbuf *mbuf) 111 { 112 const uint16_t ether_type_slow_be = 113 rte_be_to_cpu_16(RTE_ETHER_TYPE_SLOW); 114 115 return !((mbuf->ol_flags & PKT_RX_VLAN) ? mbuf->vlan_tci : 0) && 116 (ethertype == ether_type_slow_be && 117 (subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP)); 118 } 119 120 /***************************************************************************** 121 * Flow director's setup for mode 4 optimization 122 */ 123 124 static struct rte_flow_item_eth flow_item_eth_type_8023ad = { 125 .dst.addr_bytes = { 0 }, 126 .src.addr_bytes = { 0 }, 127 .type = RTE_BE16(RTE_ETHER_TYPE_SLOW), 128 }; 129 130 static struct rte_flow_item_eth flow_item_eth_mask_type_8023ad = { 131 .dst.addr_bytes = { 0 }, 132 .src.addr_bytes = { 0 }, 133 .type = 0xFFFF, 134 }; 135 136 static struct rte_flow_item flow_item_8023ad[] = { 137 { 138 .type = RTE_FLOW_ITEM_TYPE_ETH, 139 .spec = &flow_item_eth_type_8023ad, 140 .last = NULL, 141 .mask = &flow_item_eth_mask_type_8023ad, 142 }, 143 { 144 .type = RTE_FLOW_ITEM_TYPE_END, 145 .spec = NULL, 146 .last = NULL, 147 .mask = NULL, 148 } 149 }; 150 151 const struct rte_flow_attr flow_attr_8023ad = { 152 .group = 0, 153 .priority = 0, 154 .ingress = 1, 155 .egress = 0, 156 .reserved = 0, 157 }; 158 159 int 160 bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev, 161 uint16_t slave_port) { 162 struct rte_eth_dev_info slave_info; 163 struct rte_flow_error error; 164 struct bond_dev_private *internals = bond_dev->data->dev_private; 165 166 const struct rte_flow_action_queue lacp_queue_conf = { 167 .index = 0, 168 }; 169 170 const struct rte_flow_action actions[] = { 171 { 172 .type = RTE_FLOW_ACTION_TYPE_QUEUE, 173 .conf = &lacp_queue_conf 174 }, 175 { 176 .type = RTE_FLOW_ACTION_TYPE_END, 177 } 178 }; 179 180 int ret = rte_flow_validate(slave_port, &flow_attr_8023ad, 181 flow_item_8023ad, actions, &error); 182 if (ret < 0) { 183 RTE_BOND_LOG(ERR, "%s: %s (slave_port=%d queue_id=%d)", 184 __func__, error.message, slave_port, 185 internals->mode4.dedicated_queues.rx_qid); 186 return -1; 187 } 188 189 ret = rte_eth_dev_info_get(slave_port, &slave_info); 190 if (ret != 0) { 191 RTE_BOND_LOG(ERR, 192 "%s: Error during getting device (port %u) info: %s\n", 193 __func__, slave_port, strerror(-ret)); 194 195 return ret; 196 } 197 198 if (slave_info.max_rx_queues < bond_dev->data->nb_rx_queues || 199 slave_info.max_tx_queues < bond_dev->data->nb_tx_queues) { 200 RTE_BOND_LOG(ERR, 201 "%s: Slave %d capabilities doesn't allow to allocate additional queues", 202 __func__, slave_port); 203 return -1; 204 } 205 206 return 0; 207 } 208 209 int 210 bond_8023ad_slow_pkt_hw_filter_supported(uint16_t port_id) { 211 struct rte_eth_dev *bond_dev = &rte_eth_devices[port_id]; 212 struct bond_dev_private *internals = bond_dev->data->dev_private; 213 struct rte_eth_dev_info bond_info; 214 uint16_t idx; 215 int ret; 216 217 /* Verify if all slaves in bonding supports flow director and */ 218 if (internals->slave_count > 0) { 219 ret = rte_eth_dev_info_get(bond_dev->data->port_id, &bond_info); 220 if (ret != 0) { 221 RTE_BOND_LOG(ERR, 222 "%s: Error during getting device (port %u) info: %s\n", 223 __func__, bond_dev->data->port_id, 224 strerror(-ret)); 225 226 return ret; 227 } 228 229 internals->mode4.dedicated_queues.rx_qid = bond_info.nb_rx_queues; 230 internals->mode4.dedicated_queues.tx_qid = bond_info.nb_tx_queues; 231 232 for (idx = 0; idx < internals->slave_count; idx++) { 233 if (bond_ethdev_8023ad_flow_verify(bond_dev, 234 internals->slaves[idx].port_id) != 0) 235 return -1; 236 } 237 } 238 239 return 0; 240 } 241 242 int 243 bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint16_t slave_port) { 244 245 struct rte_flow_error error; 246 struct bond_dev_private *internals = bond_dev->data->dev_private; 247 struct rte_flow_action_queue lacp_queue_conf = { 248 .index = internals->mode4.dedicated_queues.rx_qid, 249 }; 250 251 const struct rte_flow_action actions[] = { 252 { 253 .type = RTE_FLOW_ACTION_TYPE_QUEUE, 254 .conf = &lacp_queue_conf 255 }, 256 { 257 .type = RTE_FLOW_ACTION_TYPE_END, 258 } 259 }; 260 261 internals->mode4.dedicated_queues.flow[slave_port] = rte_flow_create(slave_port, 262 &flow_attr_8023ad, flow_item_8023ad, actions, &error); 263 if (internals->mode4.dedicated_queues.flow[slave_port] == NULL) { 264 RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_set: %s " 265 "(slave_port=%d queue_id=%d)", 266 error.message, slave_port, 267 internals->mode4.dedicated_queues.rx_qid); 268 return -1; 269 } 270 271 return 0; 272 } 273 274 static inline uint16_t 275 rx_burst_8023ad(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts, 276 bool dedicated_rxq) 277 { 278 /* Cast to structure, containing bonded device's port id and queue id */ 279 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue; 280 struct bond_dev_private *internals = bd_rx_q->dev_private; 281 struct rte_eth_dev *bonded_eth_dev = 282 &rte_eth_devices[internals->port_id]; 283 struct rte_ether_addr *bond_mac = bonded_eth_dev->data->mac_addrs; 284 struct rte_ether_hdr *hdr; 285 286 const uint16_t ether_type_slow_be = 287 rte_be_to_cpu_16(RTE_ETHER_TYPE_SLOW); 288 uint16_t num_rx_total = 0; /* Total number of received packets */ 289 uint16_t slaves[RTE_MAX_ETHPORTS]; 290 uint16_t slave_count, idx; 291 292 uint8_t collecting; /* current slave collecting status */ 293 const uint8_t promisc = rte_eth_promiscuous_get(internals->port_id); 294 const uint8_t allmulti = rte_eth_allmulticast_get(internals->port_id); 295 uint8_t subtype; 296 uint16_t i; 297 uint16_t j; 298 uint16_t k; 299 300 /* Copy slave list to protect against slave up/down changes during tx 301 * bursting */ 302 slave_count = internals->active_slave_count; 303 memcpy(slaves, internals->active_slaves, 304 sizeof(internals->active_slaves[0]) * slave_count); 305 306 idx = internals->active_slave; 307 if (idx >= slave_count) { 308 internals->active_slave = 0; 309 idx = 0; 310 } 311 for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) { 312 j = num_rx_total; 313 collecting = ACTOR_STATE(&bond_mode_8023ad_ports[slaves[idx]], 314 COLLECTING); 315 316 /* Read packets from this slave */ 317 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id, 318 &bufs[num_rx_total], nb_pkts - num_rx_total); 319 320 for (k = j; k < 2 && k < num_rx_total; k++) 321 rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *)); 322 323 /* Handle slow protocol packets. */ 324 while (j < num_rx_total) { 325 if (j + 3 < num_rx_total) 326 rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *)); 327 328 hdr = rte_pktmbuf_mtod(bufs[j], struct rte_ether_hdr *); 329 subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype; 330 331 /* Remove packet from array if: 332 * - it is slow packet but no dedicated rxq is present, 333 * - slave is not in collecting state, 334 * - bonding interface is not in promiscuous mode: 335 * - packet is unicast and address does not match, 336 * - packet is multicast and bonding interface 337 * is not in allmulti, 338 */ 339 if (unlikely( 340 (!dedicated_rxq && 341 is_lacp_packets(hdr->ether_type, subtype, 342 bufs[j])) || 343 !collecting || 344 (!promisc && 345 ((rte_is_unicast_ether_addr(&hdr->d_addr) && 346 !rte_is_same_ether_addr(bond_mac, 347 &hdr->d_addr)) || 348 (!allmulti && 349 rte_is_multicast_ether_addr(&hdr->d_addr)))))) { 350 351 if (hdr->ether_type == ether_type_slow_be) { 352 bond_mode_8023ad_handle_slow_pkt( 353 internals, slaves[idx], bufs[j]); 354 } else 355 rte_pktmbuf_free(bufs[j]); 356 357 /* Packet is managed by mode 4 or dropped, shift the array */ 358 num_rx_total--; 359 if (j < num_rx_total) { 360 memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) * 361 (num_rx_total - j)); 362 } 363 } else 364 j++; 365 } 366 if (unlikely(++idx == slave_count)) 367 idx = 0; 368 } 369 370 if (++internals->active_slave >= slave_count) 371 internals->active_slave = 0; 372 373 return num_rx_total; 374 } 375 376 static uint16_t 377 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs, 378 uint16_t nb_pkts) 379 { 380 return rx_burst_8023ad(queue, bufs, nb_pkts, false); 381 } 382 383 static uint16_t 384 bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs, 385 uint16_t nb_pkts) 386 { 387 return rx_burst_8023ad(queue, bufs, nb_pkts, true); 388 } 389 390 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1) 391 uint32_t burstnumberRX; 392 uint32_t burstnumberTX; 393 394 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB 395 396 static void 397 arp_op_name(uint16_t arp_op, char *buf, size_t buf_len) 398 { 399 switch (arp_op) { 400 case RTE_ARP_OP_REQUEST: 401 strlcpy(buf, "ARP Request", buf_len); 402 return; 403 case RTE_ARP_OP_REPLY: 404 strlcpy(buf, "ARP Reply", buf_len); 405 return; 406 case RTE_ARP_OP_REVREQUEST: 407 strlcpy(buf, "Reverse ARP Request", buf_len); 408 return; 409 case RTE_ARP_OP_REVREPLY: 410 strlcpy(buf, "Reverse ARP Reply", buf_len); 411 return; 412 case RTE_ARP_OP_INVREQUEST: 413 strlcpy(buf, "Peer Identify Request", buf_len); 414 return; 415 case RTE_ARP_OP_INVREPLY: 416 strlcpy(buf, "Peer Identify Reply", buf_len); 417 return; 418 default: 419 break; 420 } 421 strlcpy(buf, "Unknown", buf_len); 422 return; 423 } 424 #endif 425 #define MaxIPv4String 16 426 static void 427 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size) 428 { 429 uint32_t ipv4_addr; 430 431 ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr); 432 snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF, 433 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF, 434 ipv4_addr & 0xFF); 435 } 436 437 #define MAX_CLIENTS_NUMBER 128 438 uint8_t active_clients; 439 struct client_stats_t { 440 uint16_t port; 441 uint32_t ipv4_addr; 442 uint32_t ipv4_rx_packets; 443 uint32_t ipv4_tx_packets; 444 }; 445 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER]; 446 447 static void 448 update_client_stats(uint32_t addr, uint16_t port, uint32_t *TXorRXindicator) 449 { 450 int i = 0; 451 452 for (; i < MAX_CLIENTS_NUMBER; i++) { 453 if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port)) { 454 /* Just update RX packets number for this client */ 455 if (TXorRXindicator == &burstnumberRX) 456 client_stats[i].ipv4_rx_packets++; 457 else 458 client_stats[i].ipv4_tx_packets++; 459 return; 460 } 461 } 462 /* We have a new client. Insert him to the table, and increment stats */ 463 if (TXorRXindicator == &burstnumberRX) 464 client_stats[active_clients].ipv4_rx_packets++; 465 else 466 client_stats[active_clients].ipv4_tx_packets++; 467 client_stats[active_clients].ipv4_addr = addr; 468 client_stats[active_clients].port = port; 469 active_clients++; 470 471 } 472 473 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB 474 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber) \ 475 rte_log(RTE_LOG_DEBUG, bond_logtype, \ 476 "%s port:%d SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X SrcIP:%s " \ 477 "DstMAC:%02X:%02X:%02X:%02X:%02X:%02X DstIP:%s %s %d\n", \ 478 info, \ 479 port, \ 480 eth_h->s_addr.addr_bytes[0], eth_h->s_addr.addr_bytes[1], \ 481 eth_h->s_addr.addr_bytes[2], eth_h->s_addr.addr_bytes[3], \ 482 eth_h->s_addr.addr_bytes[4], eth_h->s_addr.addr_bytes[5], \ 483 src_ip, \ 484 eth_h->d_addr.addr_bytes[0], eth_h->d_addr.addr_bytes[1], \ 485 eth_h->d_addr.addr_bytes[2], eth_h->d_addr.addr_bytes[3], \ 486 eth_h->d_addr.addr_bytes[4], eth_h->d_addr.addr_bytes[5], \ 487 dst_ip, \ 488 arp_op, ++burstnumber) 489 #endif 490 491 static void 492 mode6_debug(const char __rte_unused *info, 493 struct rte_ether_hdr *eth_h, uint16_t port, 494 uint32_t __rte_unused *burstnumber) 495 { 496 struct rte_ipv4_hdr *ipv4_h; 497 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB 498 struct rte_arp_hdr *arp_h; 499 char dst_ip[16]; 500 char ArpOp[24]; 501 char buf[16]; 502 #endif 503 char src_ip[16]; 504 505 uint16_t ether_type = eth_h->ether_type; 506 uint16_t offset = get_vlan_offset(eth_h, ðer_type); 507 508 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB 509 strlcpy(buf, info, 16); 510 #endif 511 512 if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4)) { 513 ipv4_h = (struct rte_ipv4_hdr *)((char *)(eth_h + 1) + offset); 514 ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String); 515 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB 516 ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String); 517 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber); 518 #endif 519 update_client_stats(ipv4_h->src_addr, port, burstnumber); 520 } 521 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB 522 else if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP)) { 523 arp_h = (struct rte_arp_hdr *)((char *)(eth_h + 1) + offset); 524 ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String); 525 ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String); 526 arp_op_name(rte_be_to_cpu_16(arp_h->arp_opcode), 527 ArpOp, sizeof(ArpOp)); 528 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber); 529 } 530 #endif 531 } 532 #endif 533 534 static uint16_t 535 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) 536 { 537 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue; 538 struct bond_dev_private *internals = bd_tx_q->dev_private; 539 struct rte_ether_hdr *eth_h; 540 uint16_t ether_type, offset; 541 uint16_t nb_recv_pkts; 542 int i; 543 544 nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts); 545 546 for (i = 0; i < nb_recv_pkts; i++) { 547 eth_h = rte_pktmbuf_mtod(bufs[i], struct rte_ether_hdr *); 548 ether_type = eth_h->ether_type; 549 offset = get_vlan_offset(eth_h, ðer_type); 550 551 if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP)) { 552 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1) 553 mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX); 554 #endif 555 bond_mode_alb_arp_recv(eth_h, offset, internals); 556 } 557 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1) 558 else if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4)) 559 mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX); 560 #endif 561 } 562 563 return nb_recv_pkts; 564 } 565 566 static uint16_t 567 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs, 568 uint16_t nb_pkts) 569 { 570 struct bond_dev_private *internals; 571 struct bond_tx_queue *bd_tx_q; 572 573 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts]; 574 uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 }; 575 576 uint16_t num_of_slaves; 577 uint16_t slaves[RTE_MAX_ETHPORTS]; 578 579 uint16_t num_tx_total = 0, num_tx_slave; 580 581 static int slave_idx = 0; 582 int i, cslave_idx = 0, tx_fail_total = 0; 583 584 bd_tx_q = (struct bond_tx_queue *)queue; 585 internals = bd_tx_q->dev_private; 586 587 /* Copy slave list to protect against slave up/down changes during tx 588 * bursting */ 589 num_of_slaves = internals->active_slave_count; 590 memcpy(slaves, internals->active_slaves, 591 sizeof(internals->active_slaves[0]) * num_of_slaves); 592 593 if (num_of_slaves < 1) 594 return num_tx_total; 595 596 /* Populate slaves mbuf with which packets are to be sent on it */ 597 for (i = 0; i < nb_pkts; i++) { 598 cslave_idx = (slave_idx + i) % num_of_slaves; 599 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i]; 600 } 601 602 /* increment current slave index so the next call to tx burst starts on the 603 * next slave */ 604 slave_idx = ++cslave_idx; 605 606 /* Send packet burst on each slave device */ 607 for (i = 0; i < num_of_slaves; i++) { 608 if (slave_nb_pkts[i] > 0) { 609 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id, 610 slave_bufs[i], slave_nb_pkts[i]); 611 612 /* if tx burst fails move packets to end of bufs */ 613 if (unlikely(num_tx_slave < slave_nb_pkts[i])) { 614 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave; 615 616 tx_fail_total += tx_fail_slave; 617 618 memcpy(&bufs[nb_pkts - tx_fail_total], 619 &slave_bufs[i][num_tx_slave], 620 tx_fail_slave * sizeof(bufs[0])); 621 } 622 num_tx_total += num_tx_slave; 623 } 624 } 625 626 return num_tx_total; 627 } 628 629 static uint16_t 630 bond_ethdev_tx_burst_active_backup(void *queue, 631 struct rte_mbuf **bufs, uint16_t nb_pkts) 632 { 633 struct bond_dev_private *internals; 634 struct bond_tx_queue *bd_tx_q; 635 636 bd_tx_q = (struct bond_tx_queue *)queue; 637 internals = bd_tx_q->dev_private; 638 639 if (internals->active_slave_count < 1) 640 return 0; 641 642 return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id, 643 bufs, nb_pkts); 644 } 645 646 static inline uint16_t 647 ether_hash(struct rte_ether_hdr *eth_hdr) 648 { 649 unaligned_uint16_t *word_src_addr = 650 (unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes; 651 unaligned_uint16_t *word_dst_addr = 652 (unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes; 653 654 return (word_src_addr[0] ^ word_dst_addr[0]) ^ 655 (word_src_addr[1] ^ word_dst_addr[1]) ^ 656 (word_src_addr[2] ^ word_dst_addr[2]); 657 } 658 659 static inline uint32_t 660 ipv4_hash(struct rte_ipv4_hdr *ipv4_hdr) 661 { 662 return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr; 663 } 664 665 static inline uint32_t 666 ipv6_hash(struct rte_ipv6_hdr *ipv6_hdr) 667 { 668 unaligned_uint32_t *word_src_addr = 669 (unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]); 670 unaligned_uint32_t *word_dst_addr = 671 (unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]); 672 673 return (word_src_addr[0] ^ word_dst_addr[0]) ^ 674 (word_src_addr[1] ^ word_dst_addr[1]) ^ 675 (word_src_addr[2] ^ word_dst_addr[2]) ^ 676 (word_src_addr[3] ^ word_dst_addr[3]); 677 } 678 679 680 void 681 burst_xmit_l2_hash(struct rte_mbuf **buf, uint16_t nb_pkts, 682 uint16_t slave_count, uint16_t *slaves) 683 { 684 struct rte_ether_hdr *eth_hdr; 685 uint32_t hash; 686 int i; 687 688 for (i = 0; i < nb_pkts; i++) { 689 eth_hdr = rte_pktmbuf_mtod(buf[i], struct rte_ether_hdr *); 690 691 hash = ether_hash(eth_hdr); 692 693 slaves[i] = (hash ^= hash >> 8) % slave_count; 694 } 695 } 696 697 void 698 burst_xmit_l23_hash(struct rte_mbuf **buf, uint16_t nb_pkts, 699 uint16_t slave_count, uint16_t *slaves) 700 { 701 uint16_t i; 702 struct rte_ether_hdr *eth_hdr; 703 uint16_t proto; 704 size_t vlan_offset; 705 uint32_t hash, l3hash; 706 707 for (i = 0; i < nb_pkts; i++) { 708 eth_hdr = rte_pktmbuf_mtod(buf[i], struct rte_ether_hdr *); 709 l3hash = 0; 710 711 proto = eth_hdr->ether_type; 712 hash = ether_hash(eth_hdr); 713 714 vlan_offset = get_vlan_offset(eth_hdr, &proto); 715 716 if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4) == proto) { 717 struct rte_ipv4_hdr *ipv4_hdr = (struct rte_ipv4_hdr *) 718 ((char *)(eth_hdr + 1) + vlan_offset); 719 l3hash = ipv4_hash(ipv4_hdr); 720 721 } else if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6) == proto) { 722 struct rte_ipv6_hdr *ipv6_hdr = (struct rte_ipv6_hdr *) 723 ((char *)(eth_hdr + 1) + vlan_offset); 724 l3hash = ipv6_hash(ipv6_hdr); 725 } 726 727 hash = hash ^ l3hash; 728 hash ^= hash >> 16; 729 hash ^= hash >> 8; 730 731 slaves[i] = hash % slave_count; 732 } 733 } 734 735 void 736 burst_xmit_l34_hash(struct rte_mbuf **buf, uint16_t nb_pkts, 737 uint16_t slave_count, uint16_t *slaves) 738 { 739 struct rte_ether_hdr *eth_hdr; 740 uint16_t proto; 741 size_t vlan_offset; 742 int i; 743 744 struct rte_udp_hdr *udp_hdr; 745 struct rte_tcp_hdr *tcp_hdr; 746 uint32_t hash, l3hash, l4hash; 747 748 for (i = 0; i < nb_pkts; i++) { 749 eth_hdr = rte_pktmbuf_mtod(buf[i], struct rte_ether_hdr *); 750 size_t pkt_end = (size_t)eth_hdr + rte_pktmbuf_data_len(buf[i]); 751 proto = eth_hdr->ether_type; 752 vlan_offset = get_vlan_offset(eth_hdr, &proto); 753 l3hash = 0; 754 l4hash = 0; 755 756 if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4) == proto) { 757 struct rte_ipv4_hdr *ipv4_hdr = (struct rte_ipv4_hdr *) 758 ((char *)(eth_hdr + 1) + vlan_offset); 759 size_t ip_hdr_offset; 760 761 l3hash = ipv4_hash(ipv4_hdr); 762 763 /* there is no L4 header in fragmented packet */ 764 if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr) 765 == 0)) { 766 ip_hdr_offset = (ipv4_hdr->version_ihl 767 & RTE_IPV4_HDR_IHL_MASK) * 768 RTE_IPV4_IHL_MULTIPLIER; 769 770 if (ipv4_hdr->next_proto_id == IPPROTO_TCP) { 771 tcp_hdr = (struct rte_tcp_hdr *) 772 ((char *)ipv4_hdr + 773 ip_hdr_offset); 774 if ((size_t)tcp_hdr + sizeof(*tcp_hdr) 775 < pkt_end) 776 l4hash = HASH_L4_PORTS(tcp_hdr); 777 } else if (ipv4_hdr->next_proto_id == 778 IPPROTO_UDP) { 779 udp_hdr = (struct rte_udp_hdr *) 780 ((char *)ipv4_hdr + 781 ip_hdr_offset); 782 if ((size_t)udp_hdr + sizeof(*udp_hdr) 783 < pkt_end) 784 l4hash = HASH_L4_PORTS(udp_hdr); 785 } 786 } 787 } else if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6) == proto) { 788 struct rte_ipv6_hdr *ipv6_hdr = (struct rte_ipv6_hdr *) 789 ((char *)(eth_hdr + 1) + vlan_offset); 790 l3hash = ipv6_hash(ipv6_hdr); 791 792 if (ipv6_hdr->proto == IPPROTO_TCP) { 793 tcp_hdr = (struct rte_tcp_hdr *)(ipv6_hdr + 1); 794 l4hash = HASH_L4_PORTS(tcp_hdr); 795 } else if (ipv6_hdr->proto == IPPROTO_UDP) { 796 udp_hdr = (struct rte_udp_hdr *)(ipv6_hdr + 1); 797 l4hash = HASH_L4_PORTS(udp_hdr); 798 } 799 } 800 801 hash = l3hash ^ l4hash; 802 hash ^= hash >> 16; 803 hash ^= hash >> 8; 804 805 slaves[i] = hash % slave_count; 806 } 807 } 808 809 struct bwg_slave { 810 uint64_t bwg_left_int; 811 uint64_t bwg_left_remainder; 812 uint16_t slave; 813 }; 814 815 void 816 bond_tlb_activate_slave(struct bond_dev_private *internals) { 817 int i; 818 819 for (i = 0; i < internals->active_slave_count; i++) { 820 tlb_last_obytets[internals->active_slaves[i]] = 0; 821 } 822 } 823 824 static int 825 bandwidth_cmp(const void *a, const void *b) 826 { 827 const struct bwg_slave *bwg_a = a; 828 const struct bwg_slave *bwg_b = b; 829 int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int; 830 int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder - 831 (int64_t)bwg_a->bwg_left_remainder; 832 if (diff > 0) 833 return 1; 834 else if (diff < 0) 835 return -1; 836 else if (diff2 > 0) 837 return 1; 838 else if (diff2 < 0) 839 return -1; 840 else 841 return 0; 842 } 843 844 static void 845 bandwidth_left(uint16_t port_id, uint64_t load, uint8_t update_idx, 846 struct bwg_slave *bwg_slave) 847 { 848 struct rte_eth_link link_status; 849 int ret; 850 851 ret = rte_eth_link_get_nowait(port_id, &link_status); 852 if (ret < 0) { 853 RTE_BOND_LOG(ERR, "Slave (port %u) link get failed: %s", 854 port_id, rte_strerror(-ret)); 855 return; 856 } 857 uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8; 858 if (link_bwg == 0) 859 return; 860 link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS; 861 bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg; 862 bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg; 863 } 864 865 static void 866 bond_ethdev_update_tlb_slave_cb(void *arg) 867 { 868 struct bond_dev_private *internals = arg; 869 struct rte_eth_stats slave_stats; 870 struct bwg_slave bwg_array[RTE_MAX_ETHPORTS]; 871 uint16_t slave_count; 872 uint64_t tx_bytes; 873 874 uint8_t update_stats = 0; 875 uint16_t slave_id; 876 uint16_t i; 877 878 internals->slave_update_idx++; 879 880 881 if (internals->slave_update_idx >= REORDER_PERIOD_MS) 882 update_stats = 1; 883 884 for (i = 0; i < internals->active_slave_count; i++) { 885 slave_id = internals->active_slaves[i]; 886 rte_eth_stats_get(slave_id, &slave_stats); 887 tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id]; 888 bandwidth_left(slave_id, tx_bytes, 889 internals->slave_update_idx, &bwg_array[i]); 890 bwg_array[i].slave = slave_id; 891 892 if (update_stats) { 893 tlb_last_obytets[slave_id] = slave_stats.obytes; 894 } 895 } 896 897 if (update_stats == 1) 898 internals->slave_update_idx = 0; 899 900 slave_count = i; 901 qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp); 902 for (i = 0; i < slave_count; i++) 903 internals->tlb_slaves_order[i] = bwg_array[i].slave; 904 905 rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb, 906 (struct bond_dev_private *)internals); 907 } 908 909 static uint16_t 910 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) 911 { 912 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue; 913 struct bond_dev_private *internals = bd_tx_q->dev_private; 914 915 struct rte_eth_dev *primary_port = 916 &rte_eth_devices[internals->primary_port]; 917 uint16_t num_tx_total = 0; 918 uint16_t i, j; 919 920 uint16_t num_of_slaves = internals->active_slave_count; 921 uint16_t slaves[RTE_MAX_ETHPORTS]; 922 923 struct rte_ether_hdr *ether_hdr; 924 struct rte_ether_addr primary_slave_addr; 925 struct rte_ether_addr active_slave_addr; 926 927 if (num_of_slaves < 1) 928 return num_tx_total; 929 930 memcpy(slaves, internals->tlb_slaves_order, 931 sizeof(internals->tlb_slaves_order[0]) * num_of_slaves); 932 933 934 rte_ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr); 935 936 if (nb_pkts > 3) { 937 for (i = 0; i < 3; i++) 938 rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*)); 939 } 940 941 for (i = 0; i < num_of_slaves; i++) { 942 rte_eth_macaddr_get(slaves[i], &active_slave_addr); 943 for (j = num_tx_total; j < nb_pkts; j++) { 944 if (j + 3 < nb_pkts) 945 rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*)); 946 947 ether_hdr = rte_pktmbuf_mtod(bufs[j], 948 struct rte_ether_hdr *); 949 if (rte_is_same_ether_addr(ðer_hdr->s_addr, 950 &primary_slave_addr)) 951 rte_ether_addr_copy(&active_slave_addr, 952 ðer_hdr->s_addr); 953 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1) 954 mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX); 955 #endif 956 } 957 958 num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id, 959 bufs + num_tx_total, nb_pkts - num_tx_total); 960 961 if (num_tx_total == nb_pkts) 962 break; 963 } 964 965 return num_tx_total; 966 } 967 968 void 969 bond_tlb_disable(struct bond_dev_private *internals) 970 { 971 rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals); 972 } 973 974 void 975 bond_tlb_enable(struct bond_dev_private *internals) 976 { 977 bond_ethdev_update_tlb_slave_cb(internals); 978 } 979 980 static uint16_t 981 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) 982 { 983 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue; 984 struct bond_dev_private *internals = bd_tx_q->dev_private; 985 986 struct rte_ether_hdr *eth_h; 987 uint16_t ether_type, offset; 988 989 struct client_data *client_info; 990 991 /* 992 * We create transmit buffers for every slave and one additional to send 993 * through tlb. In worst case every packet will be send on one port. 994 */ 995 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts]; 996 uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 }; 997 998 /* 999 * We create separate transmit buffers for update packets as they won't 1000 * be counted in num_tx_total. 1001 */ 1002 struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE]; 1003 uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 }; 1004 1005 struct rte_mbuf *upd_pkt; 1006 size_t pkt_size; 1007 1008 uint16_t num_send, num_not_send = 0; 1009 uint16_t num_tx_total = 0; 1010 uint16_t slave_idx; 1011 1012 int i, j; 1013 1014 /* Search tx buffer for ARP packets and forward them to alb */ 1015 for (i = 0; i < nb_pkts; i++) { 1016 eth_h = rte_pktmbuf_mtod(bufs[i], struct rte_ether_hdr *); 1017 ether_type = eth_h->ether_type; 1018 offset = get_vlan_offset(eth_h, ðer_type); 1019 1020 if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP)) { 1021 slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals); 1022 1023 /* Change src mac in eth header */ 1024 rte_eth_macaddr_get(slave_idx, ð_h->s_addr); 1025 1026 /* Add packet to slave tx buffer */ 1027 slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i]; 1028 slave_bufs_pkts[slave_idx]++; 1029 } else { 1030 /* If packet is not ARP, send it with TLB policy */ 1031 slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] = 1032 bufs[i]; 1033 slave_bufs_pkts[RTE_MAX_ETHPORTS]++; 1034 } 1035 } 1036 1037 /* Update connected client ARP tables */ 1038 if (internals->mode6.ntt) { 1039 for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) { 1040 client_info = &internals->mode6.client_table[i]; 1041 1042 if (client_info->in_use) { 1043 /* Allocate new packet to send ARP update on current slave */ 1044 upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool); 1045 if (upd_pkt == NULL) { 1046 RTE_BOND_LOG(ERR, 1047 "Failed to allocate ARP packet from pool"); 1048 continue; 1049 } 1050 pkt_size = sizeof(struct rte_ether_hdr) + 1051 sizeof(struct rte_arp_hdr) + 1052 client_info->vlan_count * 1053 sizeof(struct rte_vlan_hdr); 1054 upd_pkt->data_len = pkt_size; 1055 upd_pkt->pkt_len = pkt_size; 1056 1057 slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt, 1058 internals); 1059 1060 /* Add packet to update tx buffer */ 1061 update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt; 1062 update_bufs_pkts[slave_idx]++; 1063 } 1064 } 1065 internals->mode6.ntt = 0; 1066 } 1067 1068 /* Send ARP packets on proper slaves */ 1069 for (i = 0; i < RTE_MAX_ETHPORTS; i++) { 1070 if (slave_bufs_pkts[i] > 0) { 1071 num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, 1072 slave_bufs[i], slave_bufs_pkts[i]); 1073 for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) { 1074 bufs[nb_pkts - 1 - num_not_send - j] = 1075 slave_bufs[i][nb_pkts - 1 - j]; 1076 } 1077 1078 num_tx_total += num_send; 1079 num_not_send += slave_bufs_pkts[i] - num_send; 1080 1081 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1) 1082 /* Print TX stats including update packets */ 1083 for (j = 0; j < slave_bufs_pkts[i]; j++) { 1084 eth_h = rte_pktmbuf_mtod(slave_bufs[i][j], 1085 struct rte_ether_hdr *); 1086 mode6_debug("TX ARP:", eth_h, i, &burstnumberTX); 1087 } 1088 #endif 1089 } 1090 } 1091 1092 /* Send update packets on proper slaves */ 1093 for (i = 0; i < RTE_MAX_ETHPORTS; i++) { 1094 if (update_bufs_pkts[i] > 0) { 1095 num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i], 1096 update_bufs_pkts[i]); 1097 for (j = num_send; j < update_bufs_pkts[i]; j++) { 1098 rte_pktmbuf_free(update_bufs[i][j]); 1099 } 1100 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1) 1101 for (j = 0; j < update_bufs_pkts[i]; j++) { 1102 eth_h = rte_pktmbuf_mtod(update_bufs[i][j], 1103 struct rte_ether_hdr *); 1104 mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX); 1105 } 1106 #endif 1107 } 1108 } 1109 1110 /* Send non-ARP packets using tlb policy */ 1111 if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) { 1112 num_send = bond_ethdev_tx_burst_tlb(queue, 1113 slave_bufs[RTE_MAX_ETHPORTS], 1114 slave_bufs_pkts[RTE_MAX_ETHPORTS]); 1115 1116 for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) { 1117 bufs[nb_pkts - 1 - num_not_send - j] = 1118 slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j]; 1119 } 1120 1121 num_tx_total += num_send; 1122 } 1123 1124 return num_tx_total; 1125 } 1126 1127 static inline uint16_t 1128 tx_burst_balance(void *queue, struct rte_mbuf **bufs, uint16_t nb_bufs, 1129 uint16_t *slave_port_ids, uint16_t slave_count) 1130 { 1131 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue; 1132 struct bond_dev_private *internals = bd_tx_q->dev_private; 1133 1134 /* Array to sort mbufs for transmission on each slave into */ 1135 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs]; 1136 /* Number of mbufs for transmission on each slave */ 1137 uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 }; 1138 /* Mapping array generated by hash function to map mbufs to slaves */ 1139 uint16_t bufs_slave_port_idxs[nb_bufs]; 1140 1141 uint16_t slave_tx_count; 1142 uint16_t total_tx_count = 0, total_tx_fail_count = 0; 1143 1144 uint16_t i; 1145 1146 /* 1147 * Populate slaves mbuf with the packets which are to be sent on it 1148 * selecting output slave using hash based on xmit policy 1149 */ 1150 internals->burst_xmit_hash(bufs, nb_bufs, slave_count, 1151 bufs_slave_port_idxs); 1152 1153 for (i = 0; i < nb_bufs; i++) { 1154 /* Populate slave mbuf arrays with mbufs for that slave. */ 1155 uint16_t slave_idx = bufs_slave_port_idxs[i]; 1156 1157 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i]; 1158 } 1159 1160 /* Send packet burst on each slave device */ 1161 for (i = 0; i < slave_count; i++) { 1162 if (slave_nb_bufs[i] == 0) 1163 continue; 1164 1165 slave_tx_count = rte_eth_tx_burst(slave_port_ids[i], 1166 bd_tx_q->queue_id, slave_bufs[i], 1167 slave_nb_bufs[i]); 1168 1169 total_tx_count += slave_tx_count; 1170 1171 /* If tx burst fails move packets to end of bufs */ 1172 if (unlikely(slave_tx_count < slave_nb_bufs[i])) { 1173 int slave_tx_fail_count = slave_nb_bufs[i] - 1174 slave_tx_count; 1175 total_tx_fail_count += slave_tx_fail_count; 1176 memcpy(&bufs[nb_bufs - total_tx_fail_count], 1177 &slave_bufs[i][slave_tx_count], 1178 slave_tx_fail_count * sizeof(bufs[0])); 1179 } 1180 } 1181 1182 return total_tx_count; 1183 } 1184 1185 static uint16_t 1186 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs, 1187 uint16_t nb_bufs) 1188 { 1189 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue; 1190 struct bond_dev_private *internals = bd_tx_q->dev_private; 1191 1192 uint16_t slave_port_ids[RTE_MAX_ETHPORTS]; 1193 uint16_t slave_count; 1194 1195 if (unlikely(nb_bufs == 0)) 1196 return 0; 1197 1198 /* Copy slave list to protect against slave up/down changes during tx 1199 * bursting 1200 */ 1201 slave_count = internals->active_slave_count; 1202 if (unlikely(slave_count < 1)) 1203 return 0; 1204 1205 memcpy(slave_port_ids, internals->active_slaves, 1206 sizeof(slave_port_ids[0]) * slave_count); 1207 return tx_burst_balance(queue, bufs, nb_bufs, slave_port_ids, 1208 slave_count); 1209 } 1210 1211 static inline uint16_t 1212 tx_burst_8023ad(void *queue, struct rte_mbuf **bufs, uint16_t nb_bufs, 1213 bool dedicated_txq) 1214 { 1215 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue; 1216 struct bond_dev_private *internals = bd_tx_q->dev_private; 1217 1218 uint16_t slave_port_ids[RTE_MAX_ETHPORTS]; 1219 uint16_t slave_count; 1220 1221 uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS]; 1222 uint16_t dist_slave_count; 1223 1224 uint16_t slave_tx_count; 1225 1226 uint16_t i; 1227 1228 /* Copy slave list to protect against slave up/down changes during tx 1229 * bursting */ 1230 slave_count = internals->active_slave_count; 1231 if (unlikely(slave_count < 1)) 1232 return 0; 1233 1234 memcpy(slave_port_ids, internals->active_slaves, 1235 sizeof(slave_port_ids[0]) * slave_count); 1236 1237 if (dedicated_txq) 1238 goto skip_tx_ring; 1239 1240 /* Check for LACP control packets and send if available */ 1241 for (i = 0; i < slave_count; i++) { 1242 struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]]; 1243 struct rte_mbuf *ctrl_pkt = NULL; 1244 1245 if (likely(rte_ring_empty(port->tx_ring))) 1246 continue; 1247 1248 if (rte_ring_dequeue(port->tx_ring, 1249 (void **)&ctrl_pkt) != -ENOENT) { 1250 slave_tx_count = rte_eth_tx_burst(slave_port_ids[i], 1251 bd_tx_q->queue_id, &ctrl_pkt, 1); 1252 /* 1253 * re-enqueue LAG control plane packets to buffering 1254 * ring if transmission fails so the packet isn't lost. 1255 */ 1256 if (slave_tx_count != 1) 1257 rte_ring_enqueue(port->tx_ring, ctrl_pkt); 1258 } 1259 } 1260 1261 skip_tx_ring: 1262 if (unlikely(nb_bufs == 0)) 1263 return 0; 1264 1265 dist_slave_count = 0; 1266 for (i = 0; i < slave_count; i++) { 1267 struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]]; 1268 1269 if (ACTOR_STATE(port, DISTRIBUTING)) 1270 dist_slave_port_ids[dist_slave_count++] = 1271 slave_port_ids[i]; 1272 } 1273 1274 if (unlikely(dist_slave_count < 1)) 1275 return 0; 1276 1277 return tx_burst_balance(queue, bufs, nb_bufs, dist_slave_port_ids, 1278 dist_slave_count); 1279 } 1280 1281 static uint16_t 1282 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs, 1283 uint16_t nb_bufs) 1284 { 1285 return tx_burst_8023ad(queue, bufs, nb_bufs, false); 1286 } 1287 1288 static uint16_t 1289 bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs, 1290 uint16_t nb_bufs) 1291 { 1292 return tx_burst_8023ad(queue, bufs, nb_bufs, true); 1293 } 1294 1295 static uint16_t 1296 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs, 1297 uint16_t nb_pkts) 1298 { 1299 struct bond_dev_private *internals; 1300 struct bond_tx_queue *bd_tx_q; 1301 1302 uint16_t slaves[RTE_MAX_ETHPORTS]; 1303 uint8_t tx_failed_flag = 0; 1304 uint16_t num_of_slaves; 1305 1306 uint16_t max_nb_of_tx_pkts = 0; 1307 1308 int slave_tx_total[RTE_MAX_ETHPORTS]; 1309 int i, most_successful_tx_slave = -1; 1310 1311 bd_tx_q = (struct bond_tx_queue *)queue; 1312 internals = bd_tx_q->dev_private; 1313 1314 /* Copy slave list to protect against slave up/down changes during tx 1315 * bursting */ 1316 num_of_slaves = internals->active_slave_count; 1317 memcpy(slaves, internals->active_slaves, 1318 sizeof(internals->active_slaves[0]) * num_of_slaves); 1319 1320 if (num_of_slaves < 1) 1321 return 0; 1322 1323 /* Increment reference count on mbufs */ 1324 for (i = 0; i < nb_pkts; i++) 1325 rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1); 1326 1327 /* Transmit burst on each active slave */ 1328 for (i = 0; i < num_of_slaves; i++) { 1329 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id, 1330 bufs, nb_pkts); 1331 1332 if (unlikely(slave_tx_total[i] < nb_pkts)) 1333 tx_failed_flag = 1; 1334 1335 /* record the value and slave index for the slave which transmits the 1336 * maximum number of packets */ 1337 if (slave_tx_total[i] > max_nb_of_tx_pkts) { 1338 max_nb_of_tx_pkts = slave_tx_total[i]; 1339 most_successful_tx_slave = i; 1340 } 1341 } 1342 1343 /* if slaves fail to transmit packets from burst, the calling application 1344 * is not expected to know about multiple references to packets so we must 1345 * handle failures of all packets except those of the most successful slave 1346 */ 1347 if (unlikely(tx_failed_flag)) 1348 for (i = 0; i < num_of_slaves; i++) 1349 if (i != most_successful_tx_slave) 1350 while (slave_tx_total[i] < nb_pkts) 1351 rte_pktmbuf_free(bufs[slave_tx_total[i]++]); 1352 1353 return max_nb_of_tx_pkts; 1354 } 1355 1356 static void 1357 link_properties_set(struct rte_eth_dev *ethdev, struct rte_eth_link *slave_link) 1358 { 1359 struct bond_dev_private *bond_ctx = ethdev->data->dev_private; 1360 1361 if (bond_ctx->mode == BONDING_MODE_8023AD) { 1362 /** 1363 * If in mode 4 then save the link properties of the first 1364 * slave, all subsequent slaves must match these properties 1365 */ 1366 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link; 1367 1368 bond_link->link_autoneg = slave_link->link_autoneg; 1369 bond_link->link_duplex = slave_link->link_duplex; 1370 bond_link->link_speed = slave_link->link_speed; 1371 } else { 1372 /** 1373 * In any other mode the link properties are set to default 1374 * values of AUTONEG/DUPLEX 1375 */ 1376 ethdev->data->dev_link.link_autoneg = ETH_LINK_AUTONEG; 1377 ethdev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX; 1378 } 1379 } 1380 1381 static int 1382 link_properties_valid(struct rte_eth_dev *ethdev, 1383 struct rte_eth_link *slave_link) 1384 { 1385 struct bond_dev_private *bond_ctx = ethdev->data->dev_private; 1386 1387 if (bond_ctx->mode == BONDING_MODE_8023AD) { 1388 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link; 1389 1390 if (bond_link->link_duplex != slave_link->link_duplex || 1391 bond_link->link_autoneg != slave_link->link_autoneg || 1392 bond_link->link_speed != slave_link->link_speed) 1393 return -1; 1394 } 1395 1396 return 0; 1397 } 1398 1399 int 1400 mac_address_get(struct rte_eth_dev *eth_dev, 1401 struct rte_ether_addr *dst_mac_addr) 1402 { 1403 struct rte_ether_addr *mac_addr; 1404 1405 if (eth_dev == NULL) { 1406 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified"); 1407 return -1; 1408 } 1409 1410 if (dst_mac_addr == NULL) { 1411 RTE_BOND_LOG(ERR, "NULL pointer MAC specified"); 1412 return -1; 1413 } 1414 1415 mac_addr = eth_dev->data->mac_addrs; 1416 1417 rte_ether_addr_copy(mac_addr, dst_mac_addr); 1418 return 0; 1419 } 1420 1421 int 1422 mac_address_set(struct rte_eth_dev *eth_dev, 1423 struct rte_ether_addr *new_mac_addr) 1424 { 1425 struct rte_ether_addr *mac_addr; 1426 1427 if (eth_dev == NULL) { 1428 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified"); 1429 return -1; 1430 } 1431 1432 if (new_mac_addr == NULL) { 1433 RTE_BOND_LOG(ERR, "NULL pointer MAC specified"); 1434 return -1; 1435 } 1436 1437 mac_addr = eth_dev->data->mac_addrs; 1438 1439 /* If new MAC is different to current MAC then update */ 1440 if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0) 1441 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr)); 1442 1443 return 0; 1444 } 1445 1446 static const struct rte_ether_addr null_mac_addr; 1447 1448 /* 1449 * Add additional MAC addresses to the slave 1450 */ 1451 int 1452 slave_add_mac_addresses(struct rte_eth_dev *bonded_eth_dev, 1453 uint16_t slave_port_id) 1454 { 1455 int i, ret; 1456 struct rte_ether_addr *mac_addr; 1457 1458 for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) { 1459 mac_addr = &bonded_eth_dev->data->mac_addrs[i]; 1460 if (rte_is_same_ether_addr(mac_addr, &null_mac_addr)) 1461 break; 1462 1463 ret = rte_eth_dev_mac_addr_add(slave_port_id, mac_addr, 0); 1464 if (ret < 0) { 1465 /* rollback */ 1466 for (i--; i > 0; i--) 1467 rte_eth_dev_mac_addr_remove(slave_port_id, 1468 &bonded_eth_dev->data->mac_addrs[i]); 1469 return ret; 1470 } 1471 } 1472 1473 return 0; 1474 } 1475 1476 /* 1477 * Remove additional MAC addresses from the slave 1478 */ 1479 int 1480 slave_remove_mac_addresses(struct rte_eth_dev *bonded_eth_dev, 1481 uint16_t slave_port_id) 1482 { 1483 int i, rc, ret; 1484 struct rte_ether_addr *mac_addr; 1485 1486 rc = 0; 1487 for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) { 1488 mac_addr = &bonded_eth_dev->data->mac_addrs[i]; 1489 if (rte_is_same_ether_addr(mac_addr, &null_mac_addr)) 1490 break; 1491 1492 ret = rte_eth_dev_mac_addr_remove(slave_port_id, mac_addr); 1493 /* save only the first error */ 1494 if (ret < 0 && rc == 0) 1495 rc = ret; 1496 } 1497 1498 return rc; 1499 } 1500 1501 int 1502 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev) 1503 { 1504 struct bond_dev_private *internals = bonded_eth_dev->data->dev_private; 1505 bool set; 1506 int i; 1507 1508 /* Update slave devices MAC addresses */ 1509 if (internals->slave_count < 1) 1510 return -1; 1511 1512 switch (internals->mode) { 1513 case BONDING_MODE_ROUND_ROBIN: 1514 case BONDING_MODE_BALANCE: 1515 case BONDING_MODE_BROADCAST: 1516 for (i = 0; i < internals->slave_count; i++) { 1517 if (rte_eth_dev_default_mac_addr_set( 1518 internals->slaves[i].port_id, 1519 bonded_eth_dev->data->mac_addrs)) { 1520 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address", 1521 internals->slaves[i].port_id); 1522 return -1; 1523 } 1524 } 1525 break; 1526 case BONDING_MODE_8023AD: 1527 bond_mode_8023ad_mac_address_update(bonded_eth_dev); 1528 break; 1529 case BONDING_MODE_ACTIVE_BACKUP: 1530 case BONDING_MODE_TLB: 1531 case BONDING_MODE_ALB: 1532 default: 1533 set = true; 1534 for (i = 0; i < internals->slave_count; i++) { 1535 if (internals->slaves[i].port_id == 1536 internals->current_primary_port) { 1537 if (rte_eth_dev_default_mac_addr_set( 1538 internals->current_primary_port, 1539 bonded_eth_dev->data->mac_addrs)) { 1540 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address", 1541 internals->current_primary_port); 1542 set = false; 1543 } 1544 } else { 1545 if (rte_eth_dev_default_mac_addr_set( 1546 internals->slaves[i].port_id, 1547 &internals->slaves[i].persisted_mac_addr)) { 1548 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address", 1549 internals->slaves[i].port_id); 1550 } 1551 } 1552 } 1553 if (!set) 1554 return -1; 1555 } 1556 1557 return 0; 1558 } 1559 1560 int 1561 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode) 1562 { 1563 struct bond_dev_private *internals; 1564 1565 internals = eth_dev->data->dev_private; 1566 1567 switch (mode) { 1568 case BONDING_MODE_ROUND_ROBIN: 1569 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin; 1570 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst; 1571 break; 1572 case BONDING_MODE_ACTIVE_BACKUP: 1573 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup; 1574 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup; 1575 break; 1576 case BONDING_MODE_BALANCE: 1577 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance; 1578 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst; 1579 break; 1580 case BONDING_MODE_BROADCAST: 1581 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast; 1582 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst; 1583 break; 1584 case BONDING_MODE_8023AD: 1585 if (bond_mode_8023ad_enable(eth_dev) != 0) 1586 return -1; 1587 1588 if (internals->mode4.dedicated_queues.enabled == 0) { 1589 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad; 1590 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad; 1591 RTE_BOND_LOG(WARNING, 1592 "Using mode 4, it is necessary to do TX burst " 1593 "and RX burst at least every 100ms."); 1594 } else { 1595 /* Use flow director's optimization */ 1596 eth_dev->rx_pkt_burst = 1597 bond_ethdev_rx_burst_8023ad_fast_queue; 1598 eth_dev->tx_pkt_burst = 1599 bond_ethdev_tx_burst_8023ad_fast_queue; 1600 } 1601 break; 1602 case BONDING_MODE_TLB: 1603 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb; 1604 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup; 1605 break; 1606 case BONDING_MODE_ALB: 1607 if (bond_mode_alb_enable(eth_dev) != 0) 1608 return -1; 1609 1610 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb; 1611 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb; 1612 break; 1613 default: 1614 return -1; 1615 } 1616 1617 internals->mode = mode; 1618 1619 return 0; 1620 } 1621 1622 1623 static int 1624 slave_configure_slow_queue(struct rte_eth_dev *bonded_eth_dev, 1625 struct rte_eth_dev *slave_eth_dev) 1626 { 1627 int errval = 0; 1628 struct bond_dev_private *internals = bonded_eth_dev->data->dev_private; 1629 struct port *port = &bond_mode_8023ad_ports[slave_eth_dev->data->port_id]; 1630 1631 if (port->slow_pool == NULL) { 1632 char mem_name[256]; 1633 int slave_id = slave_eth_dev->data->port_id; 1634 1635 snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_slow_pool", 1636 slave_id); 1637 port->slow_pool = rte_pktmbuf_pool_create(mem_name, 8191, 1638 250, 0, RTE_MBUF_DEFAULT_BUF_SIZE, 1639 slave_eth_dev->data->numa_node); 1640 1641 /* Any memory allocation failure in initialization is critical because 1642 * resources can't be free, so reinitialization is impossible. */ 1643 if (port->slow_pool == NULL) { 1644 rte_panic("Slave %u: Failed to create memory pool '%s': %s\n", 1645 slave_id, mem_name, rte_strerror(rte_errno)); 1646 } 1647 } 1648 1649 if (internals->mode4.dedicated_queues.enabled == 1) { 1650 /* Configure slow Rx queue */ 1651 1652 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, 1653 internals->mode4.dedicated_queues.rx_qid, 128, 1654 rte_eth_dev_socket_id(slave_eth_dev->data->port_id), 1655 NULL, port->slow_pool); 1656 if (errval != 0) { 1657 RTE_BOND_LOG(ERR, 1658 "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)", 1659 slave_eth_dev->data->port_id, 1660 internals->mode4.dedicated_queues.rx_qid, 1661 errval); 1662 return errval; 1663 } 1664 1665 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, 1666 internals->mode4.dedicated_queues.tx_qid, 512, 1667 rte_eth_dev_socket_id(slave_eth_dev->data->port_id), 1668 NULL); 1669 if (errval != 0) { 1670 RTE_BOND_LOG(ERR, 1671 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)", 1672 slave_eth_dev->data->port_id, 1673 internals->mode4.dedicated_queues.tx_qid, 1674 errval); 1675 return errval; 1676 } 1677 } 1678 return 0; 1679 } 1680 1681 int 1682 slave_configure(struct rte_eth_dev *bonded_eth_dev, 1683 struct rte_eth_dev *slave_eth_dev) 1684 { 1685 struct bond_rx_queue *bd_rx_q; 1686 struct bond_tx_queue *bd_tx_q; 1687 uint16_t nb_rx_queues; 1688 uint16_t nb_tx_queues; 1689 1690 int errval; 1691 uint16_t q_id; 1692 struct rte_flow_error flow_error; 1693 1694 struct bond_dev_private *internals = bonded_eth_dev->data->dev_private; 1695 1696 /* Stop slave */ 1697 rte_eth_dev_stop(slave_eth_dev->data->port_id); 1698 1699 /* Enable interrupts on slave device if supported */ 1700 if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) 1701 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1; 1702 1703 /* If RSS is enabled for bonding, try to enable it for slaves */ 1704 if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) { 1705 if (internals->rss_key_len != 0) { 1706 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len = 1707 internals->rss_key_len; 1708 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = 1709 internals->rss_key; 1710 } else { 1711 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL; 1712 } 1713 1714 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = 1715 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf; 1716 slave_eth_dev->data->dev_conf.rxmode.mq_mode = 1717 bonded_eth_dev->data->dev_conf.rxmode.mq_mode; 1718 } 1719 1720 if (bonded_eth_dev->data->dev_conf.rxmode.offloads & 1721 DEV_RX_OFFLOAD_VLAN_FILTER) 1722 slave_eth_dev->data->dev_conf.rxmode.offloads |= 1723 DEV_RX_OFFLOAD_VLAN_FILTER; 1724 else 1725 slave_eth_dev->data->dev_conf.rxmode.offloads &= 1726 ~DEV_RX_OFFLOAD_VLAN_FILTER; 1727 1728 nb_rx_queues = bonded_eth_dev->data->nb_rx_queues; 1729 nb_tx_queues = bonded_eth_dev->data->nb_tx_queues; 1730 1731 if (internals->mode == BONDING_MODE_8023AD) { 1732 if (internals->mode4.dedicated_queues.enabled == 1) { 1733 nb_rx_queues++; 1734 nb_tx_queues++; 1735 } 1736 } 1737 1738 errval = rte_eth_dev_set_mtu(slave_eth_dev->data->port_id, 1739 bonded_eth_dev->data->mtu); 1740 if (errval != 0 && errval != -ENOTSUP) { 1741 RTE_BOND_LOG(ERR, "rte_eth_dev_set_mtu: port %u, err (%d)", 1742 slave_eth_dev->data->port_id, errval); 1743 return errval; 1744 } 1745 1746 /* Configure device */ 1747 errval = rte_eth_dev_configure(slave_eth_dev->data->port_id, 1748 nb_rx_queues, nb_tx_queues, 1749 &(slave_eth_dev->data->dev_conf)); 1750 if (errval != 0) { 1751 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u, err (%d)", 1752 slave_eth_dev->data->port_id, errval); 1753 return errval; 1754 } 1755 1756 /* Setup Rx Queues */ 1757 for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) { 1758 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id]; 1759 1760 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id, 1761 bd_rx_q->nb_rx_desc, 1762 rte_eth_dev_socket_id(slave_eth_dev->data->port_id), 1763 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool); 1764 if (errval != 0) { 1765 RTE_BOND_LOG(ERR, 1766 "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)", 1767 slave_eth_dev->data->port_id, q_id, errval); 1768 return errval; 1769 } 1770 } 1771 1772 /* Setup Tx Queues */ 1773 for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) { 1774 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id]; 1775 1776 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id, 1777 bd_tx_q->nb_tx_desc, 1778 rte_eth_dev_socket_id(slave_eth_dev->data->port_id), 1779 &bd_tx_q->tx_conf); 1780 if (errval != 0) { 1781 RTE_BOND_LOG(ERR, 1782 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)", 1783 slave_eth_dev->data->port_id, q_id, errval); 1784 return errval; 1785 } 1786 } 1787 1788 if (internals->mode == BONDING_MODE_8023AD && 1789 internals->mode4.dedicated_queues.enabled == 1) { 1790 if (slave_configure_slow_queue(bonded_eth_dev, slave_eth_dev) 1791 != 0) 1792 return errval; 1793 1794 if (bond_ethdev_8023ad_flow_verify(bonded_eth_dev, 1795 slave_eth_dev->data->port_id) != 0) { 1796 RTE_BOND_LOG(ERR, 1797 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)", 1798 slave_eth_dev->data->port_id, q_id, errval); 1799 return -1; 1800 } 1801 1802 if (internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id] != NULL) 1803 rte_flow_destroy(slave_eth_dev->data->port_id, 1804 internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id], 1805 &flow_error); 1806 1807 bond_ethdev_8023ad_flow_set(bonded_eth_dev, 1808 slave_eth_dev->data->port_id); 1809 } 1810 1811 /* Start device */ 1812 errval = rte_eth_dev_start(slave_eth_dev->data->port_id); 1813 if (errval != 0) { 1814 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)", 1815 slave_eth_dev->data->port_id, errval); 1816 return -1; 1817 } 1818 1819 /* If RSS is enabled for bonding, synchronize RETA */ 1820 if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) { 1821 int i; 1822 struct bond_dev_private *internals; 1823 1824 internals = bonded_eth_dev->data->dev_private; 1825 1826 for (i = 0; i < internals->slave_count; i++) { 1827 if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) { 1828 errval = rte_eth_dev_rss_reta_update( 1829 slave_eth_dev->data->port_id, 1830 &internals->reta_conf[0], 1831 internals->slaves[i].reta_size); 1832 if (errval != 0) { 1833 RTE_BOND_LOG(WARNING, 1834 "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)." 1835 " RSS Configuration for bonding may be inconsistent.", 1836 slave_eth_dev->data->port_id, errval); 1837 } 1838 break; 1839 } 1840 } 1841 } 1842 1843 /* If lsc interrupt is set, check initial slave's link status */ 1844 if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) { 1845 slave_eth_dev->dev_ops->link_update(slave_eth_dev, 0); 1846 bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id, 1847 RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id, 1848 NULL); 1849 } 1850 1851 return 0; 1852 } 1853 1854 void 1855 slave_remove(struct bond_dev_private *internals, 1856 struct rte_eth_dev *slave_eth_dev) 1857 { 1858 uint16_t i; 1859 1860 for (i = 0; i < internals->slave_count; i++) 1861 if (internals->slaves[i].port_id == 1862 slave_eth_dev->data->port_id) 1863 break; 1864 1865 if (i < (internals->slave_count - 1)) { 1866 struct rte_flow *flow; 1867 1868 memmove(&internals->slaves[i], &internals->slaves[i + 1], 1869 sizeof(internals->slaves[0]) * 1870 (internals->slave_count - i - 1)); 1871 TAILQ_FOREACH(flow, &internals->flow_list, next) { 1872 memmove(&flow->flows[i], &flow->flows[i + 1], 1873 sizeof(flow->flows[0]) * 1874 (internals->slave_count - i - 1)); 1875 flow->flows[internals->slave_count - 1] = NULL; 1876 } 1877 } 1878 1879 internals->slave_count--; 1880 1881 /* force reconfiguration of slave interfaces */ 1882 _rte_eth_dev_reset(slave_eth_dev); 1883 } 1884 1885 static void 1886 bond_ethdev_slave_link_status_change_monitor(void *cb_arg); 1887 1888 void 1889 slave_add(struct bond_dev_private *internals, 1890 struct rte_eth_dev *slave_eth_dev) 1891 { 1892 struct bond_slave_details *slave_details = 1893 &internals->slaves[internals->slave_count]; 1894 1895 slave_details->port_id = slave_eth_dev->data->port_id; 1896 slave_details->last_link_status = 0; 1897 1898 /* Mark slave devices that don't support interrupts so we can 1899 * compensate when we start the bond 1900 */ 1901 if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) { 1902 slave_details->link_status_poll_enabled = 1; 1903 } 1904 1905 slave_details->link_status_wait_to_complete = 0; 1906 /* clean tlb_last_obytes when adding port for bonding device */ 1907 memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs, 1908 sizeof(struct rte_ether_addr)); 1909 } 1910 1911 void 1912 bond_ethdev_primary_set(struct bond_dev_private *internals, 1913 uint16_t slave_port_id) 1914 { 1915 int i; 1916 1917 if (internals->active_slave_count < 1) 1918 internals->current_primary_port = slave_port_id; 1919 else 1920 /* Search bonded device slave ports for new proposed primary port */ 1921 for (i = 0; i < internals->active_slave_count; i++) { 1922 if (internals->active_slaves[i] == slave_port_id) 1923 internals->current_primary_port = slave_port_id; 1924 } 1925 } 1926 1927 static int 1928 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev); 1929 1930 static int 1931 bond_ethdev_start(struct rte_eth_dev *eth_dev) 1932 { 1933 struct bond_dev_private *internals; 1934 int i; 1935 1936 /* slave eth dev will be started by bonded device */ 1937 if (check_for_bonded_ethdev(eth_dev)) { 1938 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)", 1939 eth_dev->data->port_id); 1940 return -1; 1941 } 1942 1943 eth_dev->data->dev_link.link_status = ETH_LINK_DOWN; 1944 eth_dev->data->dev_started = 1; 1945 1946 internals = eth_dev->data->dev_private; 1947 1948 if (internals->slave_count == 0) { 1949 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices"); 1950 goto out_err; 1951 } 1952 1953 if (internals->user_defined_mac == 0) { 1954 struct rte_ether_addr *new_mac_addr = NULL; 1955 1956 for (i = 0; i < internals->slave_count; i++) 1957 if (internals->slaves[i].port_id == internals->primary_port) 1958 new_mac_addr = &internals->slaves[i].persisted_mac_addr; 1959 1960 if (new_mac_addr == NULL) 1961 goto out_err; 1962 1963 if (mac_address_set(eth_dev, new_mac_addr) != 0) { 1964 RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address", 1965 eth_dev->data->port_id); 1966 goto out_err; 1967 } 1968 } 1969 1970 if (internals->mode == BONDING_MODE_8023AD) { 1971 if (internals->mode4.dedicated_queues.enabled == 1) { 1972 internals->mode4.dedicated_queues.rx_qid = 1973 eth_dev->data->nb_rx_queues; 1974 internals->mode4.dedicated_queues.tx_qid = 1975 eth_dev->data->nb_tx_queues; 1976 } 1977 } 1978 1979 1980 /* Reconfigure each slave device if starting bonded device */ 1981 for (i = 0; i < internals->slave_count; i++) { 1982 struct rte_eth_dev *slave_ethdev = 1983 &(rte_eth_devices[internals->slaves[i].port_id]); 1984 if (slave_configure(eth_dev, slave_ethdev) != 0) { 1985 RTE_BOND_LOG(ERR, 1986 "bonded port (%d) failed to reconfigure slave device (%d)", 1987 eth_dev->data->port_id, 1988 internals->slaves[i].port_id); 1989 goto out_err; 1990 } 1991 /* We will need to poll for link status if any slave doesn't 1992 * support interrupts 1993 */ 1994 if (internals->slaves[i].link_status_poll_enabled) 1995 internals->link_status_polling_enabled = 1; 1996 } 1997 1998 /* start polling if needed */ 1999 if (internals->link_status_polling_enabled) { 2000 rte_eal_alarm_set( 2001 internals->link_status_polling_interval_ms * 1000, 2002 bond_ethdev_slave_link_status_change_monitor, 2003 (void *)&rte_eth_devices[internals->port_id]); 2004 } 2005 2006 /* Update all slave devices MACs*/ 2007 if (mac_address_slaves_update(eth_dev) != 0) 2008 goto out_err; 2009 2010 if (internals->user_defined_primary_port) 2011 bond_ethdev_primary_set(internals, internals->primary_port); 2012 2013 if (internals->mode == BONDING_MODE_8023AD) 2014 bond_mode_8023ad_start(eth_dev); 2015 2016 if (internals->mode == BONDING_MODE_TLB || 2017 internals->mode == BONDING_MODE_ALB) 2018 bond_tlb_enable(internals); 2019 2020 return 0; 2021 2022 out_err: 2023 eth_dev->data->dev_started = 0; 2024 return -1; 2025 } 2026 2027 static void 2028 bond_ethdev_free_queues(struct rte_eth_dev *dev) 2029 { 2030 uint16_t i; 2031 2032 if (dev->data->rx_queues != NULL) { 2033 for (i = 0; i < dev->data->nb_rx_queues; i++) { 2034 rte_free(dev->data->rx_queues[i]); 2035 dev->data->rx_queues[i] = NULL; 2036 } 2037 dev->data->nb_rx_queues = 0; 2038 } 2039 2040 if (dev->data->tx_queues != NULL) { 2041 for (i = 0; i < dev->data->nb_tx_queues; i++) { 2042 rte_free(dev->data->tx_queues[i]); 2043 dev->data->tx_queues[i] = NULL; 2044 } 2045 dev->data->nb_tx_queues = 0; 2046 } 2047 } 2048 2049 void 2050 bond_ethdev_stop(struct rte_eth_dev *eth_dev) 2051 { 2052 struct bond_dev_private *internals = eth_dev->data->dev_private; 2053 uint16_t i; 2054 2055 if (internals->mode == BONDING_MODE_8023AD) { 2056 struct port *port; 2057 void *pkt = NULL; 2058 2059 bond_mode_8023ad_stop(eth_dev); 2060 2061 /* Discard all messages to/from mode 4 state machines */ 2062 for (i = 0; i < internals->active_slave_count; i++) { 2063 port = &bond_mode_8023ad_ports[internals->active_slaves[i]]; 2064 2065 RTE_ASSERT(port->rx_ring != NULL); 2066 while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT) 2067 rte_pktmbuf_free(pkt); 2068 2069 RTE_ASSERT(port->tx_ring != NULL); 2070 while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT) 2071 rte_pktmbuf_free(pkt); 2072 } 2073 } 2074 2075 if (internals->mode == BONDING_MODE_TLB || 2076 internals->mode == BONDING_MODE_ALB) { 2077 bond_tlb_disable(internals); 2078 for (i = 0; i < internals->active_slave_count; i++) 2079 tlb_last_obytets[internals->active_slaves[i]] = 0; 2080 } 2081 2082 eth_dev->data->dev_link.link_status = ETH_LINK_DOWN; 2083 eth_dev->data->dev_started = 0; 2084 2085 internals->link_status_polling_enabled = 0; 2086 for (i = 0; i < internals->slave_count; i++) { 2087 uint16_t slave_id = internals->slaves[i].port_id; 2088 if (find_slave_by_id(internals->active_slaves, 2089 internals->active_slave_count, slave_id) != 2090 internals->active_slave_count) { 2091 internals->slaves[i].last_link_status = 0; 2092 rte_eth_dev_stop(slave_id); 2093 deactivate_slave(eth_dev, slave_id); 2094 } 2095 } 2096 } 2097 2098 void 2099 bond_ethdev_close(struct rte_eth_dev *dev) 2100 { 2101 struct bond_dev_private *internals = dev->data->dev_private; 2102 uint16_t bond_port_id = internals->port_id; 2103 int skipped = 0; 2104 struct rte_flow_error ferror; 2105 2106 RTE_BOND_LOG(INFO, "Closing bonded device %s", dev->device->name); 2107 while (internals->slave_count != skipped) { 2108 uint16_t port_id = internals->slaves[skipped].port_id; 2109 2110 rte_eth_dev_stop(port_id); 2111 2112 if (rte_eth_bond_slave_remove(bond_port_id, port_id) != 0) { 2113 RTE_BOND_LOG(ERR, 2114 "Failed to remove port %d from bonded device %s", 2115 port_id, dev->device->name); 2116 skipped++; 2117 } 2118 } 2119 bond_flow_ops.flush(dev, &ferror); 2120 bond_ethdev_free_queues(dev); 2121 rte_bitmap_reset(internals->vlan_filter_bmp); 2122 } 2123 2124 /* forward declaration */ 2125 static int bond_ethdev_configure(struct rte_eth_dev *dev); 2126 2127 static int 2128 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) 2129 { 2130 struct bond_dev_private *internals = dev->data->dev_private; 2131 struct bond_slave_details slave; 2132 int ret; 2133 2134 uint16_t max_nb_rx_queues = UINT16_MAX; 2135 uint16_t max_nb_tx_queues = UINT16_MAX; 2136 uint16_t max_rx_desc_lim = UINT16_MAX; 2137 uint16_t max_tx_desc_lim = UINT16_MAX; 2138 2139 dev_info->max_mac_addrs = BOND_MAX_MAC_ADDRS; 2140 2141 dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen ? 2142 internals->candidate_max_rx_pktlen : 2143 RTE_ETHER_MAX_JUMBO_FRAME_LEN; 2144 2145 /* Max number of tx/rx queues that the bonded device can support is the 2146 * minimum values of the bonded slaves, as all slaves must be capable 2147 * of supporting the same number of tx/rx queues. 2148 */ 2149 if (internals->slave_count > 0) { 2150 struct rte_eth_dev_info slave_info; 2151 uint16_t idx; 2152 2153 for (idx = 0; idx < internals->slave_count; idx++) { 2154 slave = internals->slaves[idx]; 2155 ret = rte_eth_dev_info_get(slave.port_id, &slave_info); 2156 if (ret != 0) { 2157 RTE_BOND_LOG(ERR, 2158 "%s: Error during getting device (port %u) info: %s\n", 2159 __func__, 2160 slave.port_id, 2161 strerror(-ret)); 2162 2163 return ret; 2164 } 2165 2166 if (slave_info.max_rx_queues < max_nb_rx_queues) 2167 max_nb_rx_queues = slave_info.max_rx_queues; 2168 2169 if (slave_info.max_tx_queues < max_nb_tx_queues) 2170 max_nb_tx_queues = slave_info.max_tx_queues; 2171 2172 if (slave_info.rx_desc_lim.nb_max < max_rx_desc_lim) 2173 max_rx_desc_lim = slave_info.rx_desc_lim.nb_max; 2174 2175 if (slave_info.tx_desc_lim.nb_max < max_tx_desc_lim) 2176 max_tx_desc_lim = slave_info.tx_desc_lim.nb_max; 2177 } 2178 } 2179 2180 dev_info->max_rx_queues = max_nb_rx_queues; 2181 dev_info->max_tx_queues = max_nb_tx_queues; 2182 2183 memcpy(&dev_info->default_rxconf, &internals->default_rxconf, 2184 sizeof(dev_info->default_rxconf)); 2185 memcpy(&dev_info->default_txconf, &internals->default_txconf, 2186 sizeof(dev_info->default_txconf)); 2187 2188 dev_info->rx_desc_lim.nb_max = max_rx_desc_lim; 2189 dev_info->tx_desc_lim.nb_max = max_tx_desc_lim; 2190 2191 /** 2192 * If dedicated hw queues enabled for link bonding device in LACP mode 2193 * then we need to reduce the maximum number of data path queues by 1. 2194 */ 2195 if (internals->mode == BONDING_MODE_8023AD && 2196 internals->mode4.dedicated_queues.enabled == 1) { 2197 dev_info->max_rx_queues--; 2198 dev_info->max_tx_queues--; 2199 } 2200 2201 dev_info->min_rx_bufsize = 0; 2202 2203 dev_info->rx_offload_capa = internals->rx_offload_capa; 2204 dev_info->tx_offload_capa = internals->tx_offload_capa; 2205 dev_info->rx_queue_offload_capa = internals->rx_queue_offload_capa; 2206 dev_info->tx_queue_offload_capa = internals->tx_queue_offload_capa; 2207 dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads; 2208 2209 dev_info->reta_size = internals->reta_size; 2210 2211 return 0; 2212 } 2213 2214 static int 2215 bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on) 2216 { 2217 int res; 2218 uint16_t i; 2219 struct bond_dev_private *internals = dev->data->dev_private; 2220 2221 /* don't do this while a slave is being added */ 2222 rte_spinlock_lock(&internals->lock); 2223 2224 if (on) 2225 rte_bitmap_set(internals->vlan_filter_bmp, vlan_id); 2226 else 2227 rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id); 2228 2229 for (i = 0; i < internals->slave_count; i++) { 2230 uint16_t port_id = internals->slaves[i].port_id; 2231 2232 res = rte_eth_dev_vlan_filter(port_id, vlan_id, on); 2233 if (res == ENOTSUP) 2234 RTE_BOND_LOG(WARNING, 2235 "Setting VLAN filter on slave port %u not supported.", 2236 port_id); 2237 } 2238 2239 rte_spinlock_unlock(&internals->lock); 2240 return 0; 2241 } 2242 2243 static int 2244 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id, 2245 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused, 2246 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool) 2247 { 2248 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *) 2249 rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue), 2250 0, dev->data->numa_node); 2251 if (bd_rx_q == NULL) 2252 return -1; 2253 2254 bd_rx_q->queue_id = rx_queue_id; 2255 bd_rx_q->dev_private = dev->data->dev_private; 2256 2257 bd_rx_q->nb_rx_desc = nb_rx_desc; 2258 2259 memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf)); 2260 bd_rx_q->mb_pool = mb_pool; 2261 2262 dev->data->rx_queues[rx_queue_id] = bd_rx_q; 2263 2264 return 0; 2265 } 2266 2267 static int 2268 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id, 2269 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused, 2270 const struct rte_eth_txconf *tx_conf) 2271 { 2272 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *) 2273 rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue), 2274 0, dev->data->numa_node); 2275 2276 if (bd_tx_q == NULL) 2277 return -1; 2278 2279 bd_tx_q->queue_id = tx_queue_id; 2280 bd_tx_q->dev_private = dev->data->dev_private; 2281 2282 bd_tx_q->nb_tx_desc = nb_tx_desc; 2283 memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf)); 2284 2285 dev->data->tx_queues[tx_queue_id] = bd_tx_q; 2286 2287 return 0; 2288 } 2289 2290 static void 2291 bond_ethdev_rx_queue_release(void *queue) 2292 { 2293 if (queue == NULL) 2294 return; 2295 2296 rte_free(queue); 2297 } 2298 2299 static void 2300 bond_ethdev_tx_queue_release(void *queue) 2301 { 2302 if (queue == NULL) 2303 return; 2304 2305 rte_free(queue); 2306 } 2307 2308 static void 2309 bond_ethdev_slave_link_status_change_monitor(void *cb_arg) 2310 { 2311 struct rte_eth_dev *bonded_ethdev, *slave_ethdev; 2312 struct bond_dev_private *internals; 2313 2314 /* Default value for polling slave found is true as we don't want to 2315 * disable the polling thread if we cannot get the lock */ 2316 int i, polling_slave_found = 1; 2317 2318 if (cb_arg == NULL) 2319 return; 2320 2321 bonded_ethdev = cb_arg; 2322 internals = bonded_ethdev->data->dev_private; 2323 2324 if (!bonded_ethdev->data->dev_started || 2325 !internals->link_status_polling_enabled) 2326 return; 2327 2328 /* If device is currently being configured then don't check slaves link 2329 * status, wait until next period */ 2330 if (rte_spinlock_trylock(&internals->lock)) { 2331 if (internals->slave_count > 0) 2332 polling_slave_found = 0; 2333 2334 for (i = 0; i < internals->slave_count; i++) { 2335 if (!internals->slaves[i].link_status_poll_enabled) 2336 continue; 2337 2338 slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id]; 2339 polling_slave_found = 1; 2340 2341 /* Update slave link status */ 2342 (*slave_ethdev->dev_ops->link_update)(slave_ethdev, 2343 internals->slaves[i].link_status_wait_to_complete); 2344 2345 /* if link status has changed since last checked then call lsc 2346 * event callback */ 2347 if (slave_ethdev->data->dev_link.link_status != 2348 internals->slaves[i].last_link_status) { 2349 internals->slaves[i].last_link_status = 2350 slave_ethdev->data->dev_link.link_status; 2351 2352 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id, 2353 RTE_ETH_EVENT_INTR_LSC, 2354 &bonded_ethdev->data->port_id, 2355 NULL); 2356 } 2357 } 2358 rte_spinlock_unlock(&internals->lock); 2359 } 2360 2361 if (polling_slave_found) 2362 /* Set alarm to continue monitoring link status of slave ethdev's */ 2363 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000, 2364 bond_ethdev_slave_link_status_change_monitor, cb_arg); 2365 } 2366 2367 static int 2368 bond_ethdev_link_update(struct rte_eth_dev *ethdev, int wait_to_complete) 2369 { 2370 int (*link_update)(uint16_t port_id, struct rte_eth_link *eth_link); 2371 2372 struct bond_dev_private *bond_ctx; 2373 struct rte_eth_link slave_link; 2374 2375 bool one_link_update_succeeded; 2376 uint32_t idx; 2377 int ret; 2378 2379 bond_ctx = ethdev->data->dev_private; 2380 2381 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE; 2382 2383 if (ethdev->data->dev_started == 0 || 2384 bond_ctx->active_slave_count == 0) { 2385 ethdev->data->dev_link.link_status = ETH_LINK_DOWN; 2386 return 0; 2387 } 2388 2389 ethdev->data->dev_link.link_status = ETH_LINK_UP; 2390 2391 if (wait_to_complete) 2392 link_update = rte_eth_link_get; 2393 else 2394 link_update = rte_eth_link_get_nowait; 2395 2396 switch (bond_ctx->mode) { 2397 case BONDING_MODE_BROADCAST: 2398 /** 2399 * Setting link speed to UINT32_MAX to ensure we pick up the 2400 * value of the first active slave 2401 */ 2402 ethdev->data->dev_link.link_speed = UINT32_MAX; 2403 2404 /** 2405 * link speed is minimum value of all the slaves link speed as 2406 * packet loss will occur on this slave if transmission at rates 2407 * greater than this are attempted 2408 */ 2409 for (idx = 0; idx < bond_ctx->active_slave_count; idx++) { 2410 ret = link_update(bond_ctx->active_slaves[idx], 2411 &slave_link); 2412 if (ret < 0) { 2413 ethdev->data->dev_link.link_speed = 2414 ETH_SPEED_NUM_NONE; 2415 RTE_BOND_LOG(ERR, 2416 "Slave (port %u) link get failed: %s", 2417 bond_ctx->active_slaves[idx], 2418 rte_strerror(-ret)); 2419 return 0; 2420 } 2421 2422 if (slave_link.link_speed < 2423 ethdev->data->dev_link.link_speed) 2424 ethdev->data->dev_link.link_speed = 2425 slave_link.link_speed; 2426 } 2427 break; 2428 case BONDING_MODE_ACTIVE_BACKUP: 2429 /* Current primary slave */ 2430 ret = link_update(bond_ctx->current_primary_port, &slave_link); 2431 if (ret < 0) { 2432 RTE_BOND_LOG(ERR, "Slave (port %u) link get failed: %s", 2433 bond_ctx->current_primary_port, 2434 rte_strerror(-ret)); 2435 return 0; 2436 } 2437 2438 ethdev->data->dev_link.link_speed = slave_link.link_speed; 2439 break; 2440 case BONDING_MODE_8023AD: 2441 ethdev->data->dev_link.link_autoneg = 2442 bond_ctx->mode4.slave_link.link_autoneg; 2443 ethdev->data->dev_link.link_duplex = 2444 bond_ctx->mode4.slave_link.link_duplex; 2445 /* fall through */ 2446 /* to update link speed */ 2447 case BONDING_MODE_ROUND_ROBIN: 2448 case BONDING_MODE_BALANCE: 2449 case BONDING_MODE_TLB: 2450 case BONDING_MODE_ALB: 2451 default: 2452 /** 2453 * In theses mode the maximum theoretical link speed is the sum 2454 * of all the slaves 2455 */ 2456 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE; 2457 one_link_update_succeeded = false; 2458 2459 for (idx = 0; idx < bond_ctx->active_slave_count; idx++) { 2460 ret = link_update(bond_ctx->active_slaves[idx], 2461 &slave_link); 2462 if (ret < 0) { 2463 RTE_BOND_LOG(ERR, 2464 "Slave (port %u) link get failed: %s", 2465 bond_ctx->active_slaves[idx], 2466 rte_strerror(-ret)); 2467 continue; 2468 } 2469 2470 one_link_update_succeeded = true; 2471 ethdev->data->dev_link.link_speed += 2472 slave_link.link_speed; 2473 } 2474 2475 if (!one_link_update_succeeded) { 2476 RTE_BOND_LOG(ERR, "All slaves link get failed"); 2477 return 0; 2478 } 2479 } 2480 2481 2482 return 0; 2483 } 2484 2485 2486 static int 2487 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) 2488 { 2489 struct bond_dev_private *internals = dev->data->dev_private; 2490 struct rte_eth_stats slave_stats; 2491 int i, j; 2492 2493 for (i = 0; i < internals->slave_count; i++) { 2494 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats); 2495 2496 stats->ipackets += slave_stats.ipackets; 2497 stats->opackets += slave_stats.opackets; 2498 stats->ibytes += slave_stats.ibytes; 2499 stats->obytes += slave_stats.obytes; 2500 stats->imissed += slave_stats.imissed; 2501 stats->ierrors += slave_stats.ierrors; 2502 stats->oerrors += slave_stats.oerrors; 2503 stats->rx_nombuf += slave_stats.rx_nombuf; 2504 2505 for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) { 2506 stats->q_ipackets[j] += slave_stats.q_ipackets[j]; 2507 stats->q_opackets[j] += slave_stats.q_opackets[j]; 2508 stats->q_ibytes[j] += slave_stats.q_ibytes[j]; 2509 stats->q_obytes[j] += slave_stats.q_obytes[j]; 2510 stats->q_errors[j] += slave_stats.q_errors[j]; 2511 } 2512 2513 } 2514 2515 return 0; 2516 } 2517 2518 static int 2519 bond_ethdev_stats_reset(struct rte_eth_dev *dev) 2520 { 2521 struct bond_dev_private *internals = dev->data->dev_private; 2522 int i; 2523 int err; 2524 int ret; 2525 2526 for (i = 0, err = 0; i < internals->slave_count; i++) { 2527 ret = rte_eth_stats_reset(internals->slaves[i].port_id); 2528 if (ret != 0) 2529 err = ret; 2530 } 2531 2532 return err; 2533 } 2534 2535 static int 2536 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev) 2537 { 2538 struct bond_dev_private *internals = eth_dev->data->dev_private; 2539 int i; 2540 int ret = 0; 2541 uint16_t port_id; 2542 2543 switch (internals->mode) { 2544 /* Promiscuous mode is propagated to all slaves */ 2545 case BONDING_MODE_ROUND_ROBIN: 2546 case BONDING_MODE_BALANCE: 2547 case BONDING_MODE_BROADCAST: 2548 case BONDING_MODE_8023AD: { 2549 unsigned int slave_ok = 0; 2550 2551 for (i = 0; i < internals->slave_count; i++) { 2552 port_id = internals->slaves[i].port_id; 2553 2554 ret = rte_eth_promiscuous_enable(port_id); 2555 if (ret != 0) 2556 RTE_BOND_LOG(ERR, 2557 "Failed to enable promiscuous mode for port %u: %s", 2558 port_id, rte_strerror(-ret)); 2559 else 2560 slave_ok++; 2561 } 2562 /* 2563 * Report success if operation is successful on at least 2564 * on one slave. Otherwise return last error code. 2565 */ 2566 if (slave_ok > 0) 2567 ret = 0; 2568 break; 2569 } 2570 /* Promiscuous mode is propagated only to primary slave */ 2571 case BONDING_MODE_ACTIVE_BACKUP: 2572 case BONDING_MODE_TLB: 2573 case BONDING_MODE_ALB: 2574 default: 2575 /* Do not touch promisc when there cannot be primary ports */ 2576 if (internals->slave_count == 0) 2577 break; 2578 port_id = internals->current_primary_port; 2579 ret = rte_eth_promiscuous_enable(port_id); 2580 if (ret != 0) 2581 RTE_BOND_LOG(ERR, 2582 "Failed to enable promiscuous mode for port %u: %s", 2583 port_id, rte_strerror(-ret)); 2584 } 2585 2586 return ret; 2587 } 2588 2589 static int 2590 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev) 2591 { 2592 struct bond_dev_private *internals = dev->data->dev_private; 2593 int i; 2594 int ret = 0; 2595 uint16_t port_id; 2596 2597 switch (internals->mode) { 2598 /* Promiscuous mode is propagated to all slaves */ 2599 case BONDING_MODE_ROUND_ROBIN: 2600 case BONDING_MODE_BALANCE: 2601 case BONDING_MODE_BROADCAST: 2602 case BONDING_MODE_8023AD: { 2603 unsigned int slave_ok = 0; 2604 2605 for (i = 0; i < internals->slave_count; i++) { 2606 port_id = internals->slaves[i].port_id; 2607 2608 if (internals->mode == BONDING_MODE_8023AD && 2609 bond_mode_8023ad_ports[port_id].forced_rx_flags == 2610 BOND_8023AD_FORCED_PROMISC) { 2611 slave_ok++; 2612 continue; 2613 } 2614 ret = rte_eth_promiscuous_disable(port_id); 2615 if (ret != 0) 2616 RTE_BOND_LOG(ERR, 2617 "Failed to disable promiscuous mode for port %u: %s", 2618 port_id, rte_strerror(-ret)); 2619 else 2620 slave_ok++; 2621 } 2622 /* 2623 * Report success if operation is successful on at least 2624 * on one slave. Otherwise return last error code. 2625 */ 2626 if (slave_ok > 0) 2627 ret = 0; 2628 break; 2629 } 2630 /* Promiscuous mode is propagated only to primary slave */ 2631 case BONDING_MODE_ACTIVE_BACKUP: 2632 case BONDING_MODE_TLB: 2633 case BONDING_MODE_ALB: 2634 default: 2635 /* Do not touch promisc when there cannot be primary ports */ 2636 if (internals->slave_count == 0) 2637 break; 2638 port_id = internals->current_primary_port; 2639 ret = rte_eth_promiscuous_disable(port_id); 2640 if (ret != 0) 2641 RTE_BOND_LOG(ERR, 2642 "Failed to disable promiscuous mode for port %u: %s", 2643 port_id, rte_strerror(-ret)); 2644 } 2645 2646 return ret; 2647 } 2648 2649 static int 2650 bond_ethdev_allmulticast_enable(struct rte_eth_dev *eth_dev) 2651 { 2652 struct bond_dev_private *internals = eth_dev->data->dev_private; 2653 int i; 2654 int ret = 0; 2655 uint16_t port_id; 2656 2657 switch (internals->mode) { 2658 /* allmulti mode is propagated to all slaves */ 2659 case BONDING_MODE_ROUND_ROBIN: 2660 case BONDING_MODE_BALANCE: 2661 case BONDING_MODE_BROADCAST: 2662 case BONDING_MODE_8023AD: { 2663 unsigned int slave_ok = 0; 2664 2665 for (i = 0; i < internals->slave_count; i++) { 2666 port_id = internals->slaves[i].port_id; 2667 2668 ret = rte_eth_allmulticast_enable(port_id); 2669 if (ret != 0) 2670 RTE_BOND_LOG(ERR, 2671 "Failed to enable allmulti mode for port %u: %s", 2672 port_id, rte_strerror(-ret)); 2673 else 2674 slave_ok++; 2675 } 2676 /* 2677 * Report success if operation is successful on at least 2678 * on one slave. Otherwise return last error code. 2679 */ 2680 if (slave_ok > 0) 2681 ret = 0; 2682 break; 2683 } 2684 /* allmulti mode is propagated only to primary slave */ 2685 case BONDING_MODE_ACTIVE_BACKUP: 2686 case BONDING_MODE_TLB: 2687 case BONDING_MODE_ALB: 2688 default: 2689 /* Do not touch allmulti when there cannot be primary ports */ 2690 if (internals->slave_count == 0) 2691 break; 2692 port_id = internals->current_primary_port; 2693 ret = rte_eth_allmulticast_enable(port_id); 2694 if (ret != 0) 2695 RTE_BOND_LOG(ERR, 2696 "Failed to enable allmulti mode for port %u: %s", 2697 port_id, rte_strerror(-ret)); 2698 } 2699 2700 return ret; 2701 } 2702 2703 static int 2704 bond_ethdev_allmulticast_disable(struct rte_eth_dev *eth_dev) 2705 { 2706 struct bond_dev_private *internals = eth_dev->data->dev_private; 2707 int i; 2708 int ret = 0; 2709 uint16_t port_id; 2710 2711 switch (internals->mode) { 2712 /* allmulti mode is propagated to all slaves */ 2713 case BONDING_MODE_ROUND_ROBIN: 2714 case BONDING_MODE_BALANCE: 2715 case BONDING_MODE_BROADCAST: 2716 case BONDING_MODE_8023AD: { 2717 unsigned int slave_ok = 0; 2718 2719 for (i = 0; i < internals->slave_count; i++) { 2720 uint16_t port_id = internals->slaves[i].port_id; 2721 2722 if (internals->mode == BONDING_MODE_8023AD && 2723 bond_mode_8023ad_ports[port_id].forced_rx_flags == 2724 BOND_8023AD_FORCED_ALLMULTI) 2725 continue; 2726 2727 ret = rte_eth_allmulticast_disable(port_id); 2728 if (ret != 0) 2729 RTE_BOND_LOG(ERR, 2730 "Failed to disable allmulti mode for port %u: %s", 2731 port_id, rte_strerror(-ret)); 2732 else 2733 slave_ok++; 2734 } 2735 /* 2736 * Report success if operation is successful on at least 2737 * on one slave. Otherwise return last error code. 2738 */ 2739 if (slave_ok > 0) 2740 ret = 0; 2741 break; 2742 } 2743 /* allmulti mode is propagated only to primary slave */ 2744 case BONDING_MODE_ACTIVE_BACKUP: 2745 case BONDING_MODE_TLB: 2746 case BONDING_MODE_ALB: 2747 default: 2748 /* Do not touch allmulti when there cannot be primary ports */ 2749 if (internals->slave_count == 0) 2750 break; 2751 port_id = internals->current_primary_port; 2752 ret = rte_eth_allmulticast_disable(port_id); 2753 if (ret != 0) 2754 RTE_BOND_LOG(ERR, 2755 "Failed to disable allmulti mode for port %u: %s", 2756 port_id, rte_strerror(-ret)); 2757 } 2758 2759 return ret; 2760 } 2761 2762 static void 2763 bond_ethdev_delayed_lsc_propagation(void *arg) 2764 { 2765 if (arg == NULL) 2766 return; 2767 2768 _rte_eth_dev_callback_process((struct rte_eth_dev *)arg, 2769 RTE_ETH_EVENT_INTR_LSC, NULL); 2770 } 2771 2772 int 2773 bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type, 2774 void *param, void *ret_param __rte_unused) 2775 { 2776 struct rte_eth_dev *bonded_eth_dev; 2777 struct bond_dev_private *internals; 2778 struct rte_eth_link link; 2779 int rc = -1; 2780 int ret; 2781 2782 uint8_t lsc_flag = 0; 2783 int valid_slave = 0; 2784 uint16_t active_pos; 2785 uint16_t i; 2786 2787 if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL) 2788 return rc; 2789 2790 bonded_eth_dev = &rte_eth_devices[*(uint16_t *)param]; 2791 2792 if (check_for_bonded_ethdev(bonded_eth_dev)) 2793 return rc; 2794 2795 internals = bonded_eth_dev->data->dev_private; 2796 2797 /* If the device isn't started don't handle interrupts */ 2798 if (!bonded_eth_dev->data->dev_started) 2799 return rc; 2800 2801 /* verify that port_id is a valid slave of bonded port */ 2802 for (i = 0; i < internals->slave_count; i++) { 2803 if (internals->slaves[i].port_id == port_id) { 2804 valid_slave = 1; 2805 break; 2806 } 2807 } 2808 2809 if (!valid_slave) 2810 return rc; 2811 2812 /* Synchronize lsc callback parallel calls either by real link event 2813 * from the slaves PMDs or by the bonding PMD itself. 2814 */ 2815 rte_spinlock_lock(&internals->lsc_lock); 2816 2817 /* Search for port in active port list */ 2818 active_pos = find_slave_by_id(internals->active_slaves, 2819 internals->active_slave_count, port_id); 2820 2821 ret = rte_eth_link_get_nowait(port_id, &link); 2822 if (ret < 0) 2823 RTE_BOND_LOG(ERR, "Slave (port %u) link get failed", port_id); 2824 2825 if (ret == 0 && link.link_status) { 2826 if (active_pos < internals->active_slave_count) 2827 goto link_update; 2828 2829 /* check link state properties if bonded link is up*/ 2830 if (bonded_eth_dev->data->dev_link.link_status == ETH_LINK_UP) { 2831 if (link_properties_valid(bonded_eth_dev, &link) != 0) 2832 RTE_BOND_LOG(ERR, "Invalid link properties " 2833 "for slave %d in bonding mode %d", 2834 port_id, internals->mode); 2835 } else { 2836 /* inherit slave link properties */ 2837 link_properties_set(bonded_eth_dev, &link); 2838 } 2839 2840 /* If no active slave ports then set this port to be 2841 * the primary port. 2842 */ 2843 if (internals->active_slave_count < 1) { 2844 /* If first active slave, then change link status */ 2845 bonded_eth_dev->data->dev_link.link_status = 2846 ETH_LINK_UP; 2847 internals->current_primary_port = port_id; 2848 lsc_flag = 1; 2849 2850 mac_address_slaves_update(bonded_eth_dev); 2851 } 2852 2853 activate_slave(bonded_eth_dev, port_id); 2854 2855 /* If the user has defined the primary port then default to 2856 * using it. 2857 */ 2858 if (internals->user_defined_primary_port && 2859 internals->primary_port == port_id) 2860 bond_ethdev_primary_set(internals, port_id); 2861 } else { 2862 if (active_pos == internals->active_slave_count) 2863 goto link_update; 2864 2865 /* Remove from active slave list */ 2866 deactivate_slave(bonded_eth_dev, port_id); 2867 2868 if (internals->active_slave_count < 1) 2869 lsc_flag = 1; 2870 2871 /* Update primary id, take first active slave from list or if none 2872 * available set to -1 */ 2873 if (port_id == internals->current_primary_port) { 2874 if (internals->active_slave_count > 0) 2875 bond_ethdev_primary_set(internals, 2876 internals->active_slaves[0]); 2877 else 2878 internals->current_primary_port = internals->primary_port; 2879 mac_address_slaves_update(bonded_eth_dev); 2880 } 2881 } 2882 2883 link_update: 2884 /** 2885 * Update bonded device link properties after any change to active 2886 * slaves 2887 */ 2888 bond_ethdev_link_update(bonded_eth_dev, 0); 2889 2890 if (lsc_flag) { 2891 /* Cancel any possible outstanding interrupts if delays are enabled */ 2892 if (internals->link_up_delay_ms > 0 || 2893 internals->link_down_delay_ms > 0) 2894 rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation, 2895 bonded_eth_dev); 2896 2897 if (bonded_eth_dev->data->dev_link.link_status) { 2898 if (internals->link_up_delay_ms > 0) 2899 rte_eal_alarm_set(internals->link_up_delay_ms * 1000, 2900 bond_ethdev_delayed_lsc_propagation, 2901 (void *)bonded_eth_dev); 2902 else 2903 _rte_eth_dev_callback_process(bonded_eth_dev, 2904 RTE_ETH_EVENT_INTR_LSC, 2905 NULL); 2906 2907 } else { 2908 if (internals->link_down_delay_ms > 0) 2909 rte_eal_alarm_set(internals->link_down_delay_ms * 1000, 2910 bond_ethdev_delayed_lsc_propagation, 2911 (void *)bonded_eth_dev); 2912 else 2913 _rte_eth_dev_callback_process(bonded_eth_dev, 2914 RTE_ETH_EVENT_INTR_LSC, 2915 NULL); 2916 } 2917 } 2918 2919 rte_spinlock_unlock(&internals->lsc_lock); 2920 2921 return rc; 2922 } 2923 2924 static int 2925 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev, 2926 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size) 2927 { 2928 unsigned i, j; 2929 int result = 0; 2930 int slave_reta_size; 2931 unsigned reta_count; 2932 struct bond_dev_private *internals = dev->data->dev_private; 2933 2934 if (reta_size != internals->reta_size) 2935 return -EINVAL; 2936 2937 /* Copy RETA table */ 2938 reta_count = (reta_size + RTE_RETA_GROUP_SIZE - 1) / 2939 RTE_RETA_GROUP_SIZE; 2940 2941 for (i = 0; i < reta_count; i++) { 2942 internals->reta_conf[i].mask = reta_conf[i].mask; 2943 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++) 2944 if ((reta_conf[i].mask >> j) & 0x01) 2945 internals->reta_conf[i].reta[j] = reta_conf[i].reta[j]; 2946 } 2947 2948 /* Fill rest of array */ 2949 for (; i < RTE_DIM(internals->reta_conf); i += reta_count) 2950 memcpy(&internals->reta_conf[i], &internals->reta_conf[0], 2951 sizeof(internals->reta_conf[0]) * reta_count); 2952 2953 /* Propagate RETA over slaves */ 2954 for (i = 0; i < internals->slave_count; i++) { 2955 slave_reta_size = internals->slaves[i].reta_size; 2956 result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id, 2957 &internals->reta_conf[0], slave_reta_size); 2958 if (result < 0) 2959 return result; 2960 } 2961 2962 return 0; 2963 } 2964 2965 static int 2966 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev, 2967 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size) 2968 { 2969 int i, j; 2970 struct bond_dev_private *internals = dev->data->dev_private; 2971 2972 if (reta_size != internals->reta_size) 2973 return -EINVAL; 2974 2975 /* Copy RETA table */ 2976 for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++) 2977 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++) 2978 if ((reta_conf[i].mask >> j) & 0x01) 2979 reta_conf[i].reta[j] = internals->reta_conf[i].reta[j]; 2980 2981 return 0; 2982 } 2983 2984 static int 2985 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev, 2986 struct rte_eth_rss_conf *rss_conf) 2987 { 2988 int i, result = 0; 2989 struct bond_dev_private *internals = dev->data->dev_private; 2990 struct rte_eth_rss_conf bond_rss_conf; 2991 2992 memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf)); 2993 2994 bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads; 2995 2996 if (bond_rss_conf.rss_hf != 0) 2997 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf; 2998 2999 if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len < 3000 sizeof(internals->rss_key)) { 3001 if (bond_rss_conf.rss_key_len == 0) 3002 bond_rss_conf.rss_key_len = 40; 3003 internals->rss_key_len = bond_rss_conf.rss_key_len; 3004 memcpy(internals->rss_key, bond_rss_conf.rss_key, 3005 internals->rss_key_len); 3006 } 3007 3008 for (i = 0; i < internals->slave_count; i++) { 3009 result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id, 3010 &bond_rss_conf); 3011 if (result < 0) 3012 return result; 3013 } 3014 3015 return 0; 3016 } 3017 3018 static int 3019 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev, 3020 struct rte_eth_rss_conf *rss_conf) 3021 { 3022 struct bond_dev_private *internals = dev->data->dev_private; 3023 3024 rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf; 3025 rss_conf->rss_key_len = internals->rss_key_len; 3026 if (rss_conf->rss_key) 3027 memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len); 3028 3029 return 0; 3030 } 3031 3032 static int 3033 bond_ethdev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) 3034 { 3035 struct rte_eth_dev *slave_eth_dev; 3036 struct bond_dev_private *internals = dev->data->dev_private; 3037 int ret, i; 3038 3039 rte_spinlock_lock(&internals->lock); 3040 3041 for (i = 0; i < internals->slave_count; i++) { 3042 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id]; 3043 if (*slave_eth_dev->dev_ops->mtu_set == NULL) { 3044 rte_spinlock_unlock(&internals->lock); 3045 return -ENOTSUP; 3046 } 3047 } 3048 for (i = 0; i < internals->slave_count; i++) { 3049 ret = rte_eth_dev_set_mtu(internals->slaves[i].port_id, mtu); 3050 if (ret < 0) { 3051 rte_spinlock_unlock(&internals->lock); 3052 return ret; 3053 } 3054 } 3055 3056 rte_spinlock_unlock(&internals->lock); 3057 return 0; 3058 } 3059 3060 static int 3061 bond_ethdev_mac_address_set(struct rte_eth_dev *dev, 3062 struct rte_ether_addr *addr) 3063 { 3064 if (mac_address_set(dev, addr)) { 3065 RTE_BOND_LOG(ERR, "Failed to update MAC address"); 3066 return -EINVAL; 3067 } 3068 3069 return 0; 3070 } 3071 3072 static int 3073 bond_filter_ctrl(struct rte_eth_dev *dev __rte_unused, 3074 enum rte_filter_type type, enum rte_filter_op op, void *arg) 3075 { 3076 if (type == RTE_ETH_FILTER_GENERIC && op == RTE_ETH_FILTER_GET) { 3077 *(const void **)arg = &bond_flow_ops; 3078 return 0; 3079 } 3080 return -ENOTSUP; 3081 } 3082 3083 static int 3084 bond_ethdev_mac_addr_add(struct rte_eth_dev *dev, 3085 struct rte_ether_addr *mac_addr, 3086 __rte_unused uint32_t index, uint32_t vmdq) 3087 { 3088 struct rte_eth_dev *slave_eth_dev; 3089 struct bond_dev_private *internals = dev->data->dev_private; 3090 int ret, i; 3091 3092 rte_spinlock_lock(&internals->lock); 3093 3094 for (i = 0; i < internals->slave_count; i++) { 3095 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id]; 3096 if (*slave_eth_dev->dev_ops->mac_addr_add == NULL || 3097 *slave_eth_dev->dev_ops->mac_addr_remove == NULL) { 3098 ret = -ENOTSUP; 3099 goto end; 3100 } 3101 } 3102 3103 for (i = 0; i < internals->slave_count; i++) { 3104 ret = rte_eth_dev_mac_addr_add(internals->slaves[i].port_id, 3105 mac_addr, vmdq); 3106 if (ret < 0) { 3107 /* rollback */ 3108 for (i--; i >= 0; i--) 3109 rte_eth_dev_mac_addr_remove( 3110 internals->slaves[i].port_id, mac_addr); 3111 goto end; 3112 } 3113 } 3114 3115 ret = 0; 3116 end: 3117 rte_spinlock_unlock(&internals->lock); 3118 return ret; 3119 } 3120 3121 static void 3122 bond_ethdev_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index) 3123 { 3124 struct rte_eth_dev *slave_eth_dev; 3125 struct bond_dev_private *internals = dev->data->dev_private; 3126 int i; 3127 3128 rte_spinlock_lock(&internals->lock); 3129 3130 for (i = 0; i < internals->slave_count; i++) { 3131 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id]; 3132 if (*slave_eth_dev->dev_ops->mac_addr_remove == NULL) 3133 goto end; 3134 } 3135 3136 struct rte_ether_addr *mac_addr = &dev->data->mac_addrs[index]; 3137 3138 for (i = 0; i < internals->slave_count; i++) 3139 rte_eth_dev_mac_addr_remove(internals->slaves[i].port_id, 3140 mac_addr); 3141 3142 end: 3143 rte_spinlock_unlock(&internals->lock); 3144 } 3145 3146 const struct eth_dev_ops default_dev_ops = { 3147 .dev_start = bond_ethdev_start, 3148 .dev_stop = bond_ethdev_stop, 3149 .dev_close = bond_ethdev_close, 3150 .dev_configure = bond_ethdev_configure, 3151 .dev_infos_get = bond_ethdev_info, 3152 .vlan_filter_set = bond_ethdev_vlan_filter_set, 3153 .rx_queue_setup = bond_ethdev_rx_queue_setup, 3154 .tx_queue_setup = bond_ethdev_tx_queue_setup, 3155 .rx_queue_release = bond_ethdev_rx_queue_release, 3156 .tx_queue_release = bond_ethdev_tx_queue_release, 3157 .link_update = bond_ethdev_link_update, 3158 .stats_get = bond_ethdev_stats_get, 3159 .stats_reset = bond_ethdev_stats_reset, 3160 .promiscuous_enable = bond_ethdev_promiscuous_enable, 3161 .promiscuous_disable = bond_ethdev_promiscuous_disable, 3162 .allmulticast_enable = bond_ethdev_allmulticast_enable, 3163 .allmulticast_disable = bond_ethdev_allmulticast_disable, 3164 .reta_update = bond_ethdev_rss_reta_update, 3165 .reta_query = bond_ethdev_rss_reta_query, 3166 .rss_hash_update = bond_ethdev_rss_hash_update, 3167 .rss_hash_conf_get = bond_ethdev_rss_hash_conf_get, 3168 .mtu_set = bond_ethdev_mtu_set, 3169 .mac_addr_set = bond_ethdev_mac_address_set, 3170 .mac_addr_add = bond_ethdev_mac_addr_add, 3171 .mac_addr_remove = bond_ethdev_mac_addr_remove, 3172 .filter_ctrl = bond_filter_ctrl 3173 }; 3174 3175 static int 3176 bond_alloc(struct rte_vdev_device *dev, uint8_t mode) 3177 { 3178 const char *name = rte_vdev_device_name(dev); 3179 uint8_t socket_id = dev->device.numa_node; 3180 struct bond_dev_private *internals = NULL; 3181 struct rte_eth_dev *eth_dev = NULL; 3182 uint32_t vlan_filter_bmp_size; 3183 3184 /* now do all data allocation - for eth_dev structure, dummy pci driver 3185 * and internal (private) data 3186 */ 3187 3188 /* reserve an ethdev entry */ 3189 eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internals)); 3190 if (eth_dev == NULL) { 3191 RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev"); 3192 goto err; 3193 } 3194 3195 internals = eth_dev->data->dev_private; 3196 eth_dev->data->nb_rx_queues = (uint16_t)1; 3197 eth_dev->data->nb_tx_queues = (uint16_t)1; 3198 3199 /* Allocate memory for storing MAC addresses */ 3200 eth_dev->data->mac_addrs = rte_zmalloc_socket(name, RTE_ETHER_ADDR_LEN * 3201 BOND_MAX_MAC_ADDRS, 0, socket_id); 3202 if (eth_dev->data->mac_addrs == NULL) { 3203 RTE_BOND_LOG(ERR, 3204 "Failed to allocate %u bytes needed to store MAC addresses", 3205 RTE_ETHER_ADDR_LEN * BOND_MAX_MAC_ADDRS); 3206 goto err; 3207 } 3208 3209 eth_dev->dev_ops = &default_dev_ops; 3210 eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC; 3211 3212 rte_spinlock_init(&internals->lock); 3213 rte_spinlock_init(&internals->lsc_lock); 3214 3215 internals->port_id = eth_dev->data->port_id; 3216 internals->mode = BONDING_MODE_INVALID; 3217 internals->current_primary_port = RTE_MAX_ETHPORTS + 1; 3218 internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2; 3219 internals->burst_xmit_hash = burst_xmit_l2_hash; 3220 internals->user_defined_mac = 0; 3221 3222 internals->link_status_polling_enabled = 0; 3223 3224 internals->link_status_polling_interval_ms = 3225 DEFAULT_POLLING_INTERVAL_10_MS; 3226 internals->link_down_delay_ms = 0; 3227 internals->link_up_delay_ms = 0; 3228 3229 internals->slave_count = 0; 3230 internals->active_slave_count = 0; 3231 internals->rx_offload_capa = 0; 3232 internals->tx_offload_capa = 0; 3233 internals->rx_queue_offload_capa = 0; 3234 internals->tx_queue_offload_capa = 0; 3235 internals->candidate_max_rx_pktlen = 0; 3236 internals->max_rx_pktlen = 0; 3237 3238 /* Initially allow to choose any offload type */ 3239 internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK; 3240 3241 memset(&internals->default_rxconf, 0, 3242 sizeof(internals->default_rxconf)); 3243 memset(&internals->default_txconf, 0, 3244 sizeof(internals->default_txconf)); 3245 3246 memset(&internals->rx_desc_lim, 0, sizeof(internals->rx_desc_lim)); 3247 memset(&internals->tx_desc_lim, 0, sizeof(internals->tx_desc_lim)); 3248 3249 memset(internals->active_slaves, 0, sizeof(internals->active_slaves)); 3250 memset(internals->slaves, 0, sizeof(internals->slaves)); 3251 3252 TAILQ_INIT(&internals->flow_list); 3253 internals->flow_isolated_valid = 0; 3254 3255 /* Set mode 4 default configuration */ 3256 bond_mode_8023ad_setup(eth_dev, NULL); 3257 if (bond_ethdev_mode_set(eth_dev, mode)) { 3258 RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode to %d", 3259 eth_dev->data->port_id, mode); 3260 goto err; 3261 } 3262 3263 vlan_filter_bmp_size = 3264 rte_bitmap_get_memory_footprint(RTE_ETHER_MAX_VLAN_ID + 1); 3265 internals->vlan_filter_bmpmem = rte_malloc(name, vlan_filter_bmp_size, 3266 RTE_CACHE_LINE_SIZE); 3267 if (internals->vlan_filter_bmpmem == NULL) { 3268 RTE_BOND_LOG(ERR, 3269 "Failed to allocate vlan bitmap for bonded device %u", 3270 eth_dev->data->port_id); 3271 goto err; 3272 } 3273 3274 internals->vlan_filter_bmp = rte_bitmap_init(RTE_ETHER_MAX_VLAN_ID + 1, 3275 internals->vlan_filter_bmpmem, vlan_filter_bmp_size); 3276 if (internals->vlan_filter_bmp == NULL) { 3277 RTE_BOND_LOG(ERR, 3278 "Failed to init vlan bitmap for bonded device %u", 3279 eth_dev->data->port_id); 3280 rte_free(internals->vlan_filter_bmpmem); 3281 goto err; 3282 } 3283 3284 return eth_dev->data->port_id; 3285 3286 err: 3287 rte_free(internals); 3288 if (eth_dev != NULL) 3289 eth_dev->data->dev_private = NULL; 3290 rte_eth_dev_release_port(eth_dev); 3291 return -1; 3292 } 3293 3294 static int 3295 bond_probe(struct rte_vdev_device *dev) 3296 { 3297 const char *name; 3298 struct bond_dev_private *internals; 3299 struct rte_kvargs *kvlist; 3300 uint8_t bonding_mode, socket_id/*, agg_mode*/; 3301 int arg_count, port_id; 3302 uint8_t agg_mode; 3303 struct rte_eth_dev *eth_dev; 3304 3305 if (!dev) 3306 return -EINVAL; 3307 3308 name = rte_vdev_device_name(dev); 3309 RTE_BOND_LOG(INFO, "Initializing pmd_bond for %s", name); 3310 3311 if (rte_eal_process_type() == RTE_PROC_SECONDARY) { 3312 eth_dev = rte_eth_dev_attach_secondary(name); 3313 if (!eth_dev) { 3314 RTE_BOND_LOG(ERR, "Failed to probe %s", name); 3315 return -1; 3316 } 3317 /* TODO: request info from primary to set up Rx and Tx */ 3318 eth_dev->dev_ops = &default_dev_ops; 3319 eth_dev->device = &dev->device; 3320 rte_eth_dev_probing_finish(eth_dev); 3321 return 0; 3322 } 3323 3324 kvlist = rte_kvargs_parse(rte_vdev_device_args(dev), 3325 pmd_bond_init_valid_arguments); 3326 if (kvlist == NULL) 3327 return -1; 3328 3329 /* Parse link bonding mode */ 3330 if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) { 3331 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG, 3332 &bond_ethdev_parse_slave_mode_kvarg, 3333 &bonding_mode) != 0) { 3334 RTE_BOND_LOG(ERR, "Invalid mode for bonded device %s", 3335 name); 3336 goto parse_error; 3337 } 3338 } else { 3339 RTE_BOND_LOG(ERR, "Mode must be specified only once for bonded " 3340 "device %s", name); 3341 goto parse_error; 3342 } 3343 3344 /* Parse socket id to create bonding device on */ 3345 arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG); 3346 if (arg_count == 1) { 3347 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG, 3348 &bond_ethdev_parse_socket_id_kvarg, &socket_id) 3349 != 0) { 3350 RTE_BOND_LOG(ERR, "Invalid socket Id specified for " 3351 "bonded device %s", name); 3352 goto parse_error; 3353 } 3354 } else if (arg_count > 1) { 3355 RTE_BOND_LOG(ERR, "Socket Id can be specified only once for " 3356 "bonded device %s", name); 3357 goto parse_error; 3358 } else { 3359 socket_id = rte_socket_id(); 3360 } 3361 3362 dev->device.numa_node = socket_id; 3363 3364 /* Create link bonding eth device */ 3365 port_id = bond_alloc(dev, bonding_mode); 3366 if (port_id < 0) { 3367 RTE_BOND_LOG(ERR, "Failed to create socket %s in mode %u on " 3368 "socket %u.", name, bonding_mode, socket_id); 3369 goto parse_error; 3370 } 3371 internals = rte_eth_devices[port_id].data->dev_private; 3372 internals->kvlist = kvlist; 3373 3374 if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) { 3375 if (rte_kvargs_process(kvlist, 3376 PMD_BOND_AGG_MODE_KVARG, 3377 &bond_ethdev_parse_slave_agg_mode_kvarg, 3378 &agg_mode) != 0) { 3379 RTE_BOND_LOG(ERR, 3380 "Failed to parse agg selection mode for bonded device %s", 3381 name); 3382 goto parse_error; 3383 } 3384 3385 if (internals->mode == BONDING_MODE_8023AD) 3386 internals->mode4.agg_selection = agg_mode; 3387 } else { 3388 internals->mode4.agg_selection = AGG_STABLE; 3389 } 3390 3391 rte_eth_dev_probing_finish(&rte_eth_devices[port_id]); 3392 RTE_BOND_LOG(INFO, "Create bonded device %s on port %d in mode %u on " 3393 "socket %u.", name, port_id, bonding_mode, socket_id); 3394 return 0; 3395 3396 parse_error: 3397 rte_kvargs_free(kvlist); 3398 3399 return -1; 3400 } 3401 3402 static int 3403 bond_remove(struct rte_vdev_device *dev) 3404 { 3405 struct rte_eth_dev *eth_dev; 3406 struct bond_dev_private *internals; 3407 const char *name; 3408 3409 if (!dev) 3410 return -EINVAL; 3411 3412 name = rte_vdev_device_name(dev); 3413 RTE_BOND_LOG(INFO, "Uninitializing pmd_bond for %s", name); 3414 3415 /* now free all data allocation - for eth_dev structure, 3416 * dummy pci driver and internal (private) data 3417 */ 3418 3419 /* find an ethdev entry */ 3420 eth_dev = rte_eth_dev_allocated(name); 3421 if (eth_dev == NULL) 3422 return -ENODEV; 3423 3424 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 3425 return rte_eth_dev_release_port(eth_dev); 3426 3427 RTE_ASSERT(eth_dev->device == &dev->device); 3428 3429 internals = eth_dev->data->dev_private; 3430 if (internals->slave_count != 0) 3431 return -EBUSY; 3432 3433 if (eth_dev->data->dev_started == 1) { 3434 bond_ethdev_stop(eth_dev); 3435 bond_ethdev_close(eth_dev); 3436 } 3437 3438 eth_dev->dev_ops = NULL; 3439 eth_dev->rx_pkt_burst = NULL; 3440 eth_dev->tx_pkt_burst = NULL; 3441 3442 internals = eth_dev->data->dev_private; 3443 /* Try to release mempool used in mode6. If the bond 3444 * device is not mode6, free the NULL is not problem. 3445 */ 3446 rte_mempool_free(internals->mode6.mempool); 3447 rte_bitmap_free(internals->vlan_filter_bmp); 3448 rte_free(internals->vlan_filter_bmpmem); 3449 3450 rte_eth_dev_release_port(eth_dev); 3451 3452 return 0; 3453 } 3454 3455 /* this part will resolve the slave portids after all the other pdev and vdev 3456 * have been allocated */ 3457 static int 3458 bond_ethdev_configure(struct rte_eth_dev *dev) 3459 { 3460 const char *name = dev->device->name; 3461 struct bond_dev_private *internals = dev->data->dev_private; 3462 struct rte_kvargs *kvlist = internals->kvlist; 3463 int arg_count; 3464 uint16_t port_id = dev - rte_eth_devices; 3465 uint8_t agg_mode; 3466 3467 static const uint8_t default_rss_key[40] = { 3468 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D, 3469 0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4, 3470 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B, 3471 0xBE, 0xAC, 0x01, 0xFA 3472 }; 3473 3474 unsigned i, j; 3475 3476 /* 3477 * If RSS is enabled, fill table with default values and 3478 * set key to the the value specified in port RSS configuration. 3479 * Fall back to default RSS key if the key is not specified 3480 */ 3481 if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) { 3482 if (dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key != NULL) { 3483 internals->rss_key_len = 3484 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len; 3485 memcpy(internals->rss_key, 3486 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key, 3487 internals->rss_key_len); 3488 } else { 3489 internals->rss_key_len = sizeof(default_rss_key); 3490 memcpy(internals->rss_key, default_rss_key, 3491 internals->rss_key_len); 3492 } 3493 3494 for (i = 0; i < RTE_DIM(internals->reta_conf); i++) { 3495 internals->reta_conf[i].mask = ~0LL; 3496 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++) 3497 internals->reta_conf[i].reta[j] = 3498 (i * RTE_RETA_GROUP_SIZE + j) % 3499 dev->data->nb_rx_queues; 3500 } 3501 } 3502 3503 /* set the max_rx_pktlen */ 3504 internals->max_rx_pktlen = internals->candidate_max_rx_pktlen; 3505 3506 /* 3507 * if no kvlist, it means that this bonded device has been created 3508 * through the bonding api. 3509 */ 3510 if (!kvlist) 3511 return 0; 3512 3513 /* Parse MAC address for bonded device */ 3514 arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG); 3515 if (arg_count == 1) { 3516 struct rte_ether_addr bond_mac; 3517 3518 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG, 3519 &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) { 3520 RTE_BOND_LOG(INFO, "Invalid mac address for bonded device %s", 3521 name); 3522 return -1; 3523 } 3524 3525 /* Set MAC address */ 3526 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) { 3527 RTE_BOND_LOG(ERR, 3528 "Failed to set mac address on bonded device %s", 3529 name); 3530 return -1; 3531 } 3532 } else if (arg_count > 1) { 3533 RTE_BOND_LOG(ERR, 3534 "MAC address can be specified only once for bonded device %s", 3535 name); 3536 return -1; 3537 } 3538 3539 /* Parse/set balance mode transmit policy */ 3540 arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG); 3541 if (arg_count == 1) { 3542 uint8_t xmit_policy; 3543 3544 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG, 3545 &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) != 3546 0) { 3547 RTE_BOND_LOG(INFO, 3548 "Invalid xmit policy specified for bonded device %s", 3549 name); 3550 return -1; 3551 } 3552 3553 /* Set balance mode transmit policy*/ 3554 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) { 3555 RTE_BOND_LOG(ERR, 3556 "Failed to set balance xmit policy on bonded device %s", 3557 name); 3558 return -1; 3559 } 3560 } else if (arg_count > 1) { 3561 RTE_BOND_LOG(ERR, 3562 "Transmit policy can be specified only once for bonded device %s", 3563 name); 3564 return -1; 3565 } 3566 3567 if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) { 3568 if (rte_kvargs_process(kvlist, 3569 PMD_BOND_AGG_MODE_KVARG, 3570 &bond_ethdev_parse_slave_agg_mode_kvarg, 3571 &agg_mode) != 0) { 3572 RTE_BOND_LOG(ERR, 3573 "Failed to parse agg selection mode for bonded device %s", 3574 name); 3575 } 3576 if (internals->mode == BONDING_MODE_8023AD) { 3577 int ret = rte_eth_bond_8023ad_agg_selection_set(port_id, 3578 agg_mode); 3579 if (ret < 0) { 3580 RTE_BOND_LOG(ERR, 3581 "Invalid args for agg selection set for bonded device %s", 3582 name); 3583 return -1; 3584 } 3585 } 3586 } 3587 3588 /* Parse/add slave ports to bonded device */ 3589 if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) { 3590 struct bond_ethdev_slave_ports slave_ports; 3591 unsigned i; 3592 3593 memset(&slave_ports, 0, sizeof(slave_ports)); 3594 3595 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG, 3596 &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) { 3597 RTE_BOND_LOG(ERR, 3598 "Failed to parse slave ports for bonded device %s", 3599 name); 3600 return -1; 3601 } 3602 3603 for (i = 0; i < slave_ports.slave_count; i++) { 3604 if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) { 3605 RTE_BOND_LOG(ERR, 3606 "Failed to add port %d as slave to bonded device %s", 3607 slave_ports.slaves[i], name); 3608 } 3609 } 3610 3611 } else { 3612 RTE_BOND_LOG(INFO, "No slaves specified for bonded device %s", name); 3613 return -1; 3614 } 3615 3616 /* Parse/set primary slave port id*/ 3617 arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG); 3618 if (arg_count == 1) { 3619 uint16_t primary_slave_port_id; 3620 3621 if (rte_kvargs_process(kvlist, 3622 PMD_BOND_PRIMARY_SLAVE_KVARG, 3623 &bond_ethdev_parse_primary_slave_port_id_kvarg, 3624 &primary_slave_port_id) < 0) { 3625 RTE_BOND_LOG(INFO, 3626 "Invalid primary slave port id specified for bonded device %s", 3627 name); 3628 return -1; 3629 } 3630 3631 /* Set balance mode transmit policy*/ 3632 if (rte_eth_bond_primary_set(port_id, primary_slave_port_id) 3633 != 0) { 3634 RTE_BOND_LOG(ERR, 3635 "Failed to set primary slave port %d on bonded device %s", 3636 primary_slave_port_id, name); 3637 return -1; 3638 } 3639 } else if (arg_count > 1) { 3640 RTE_BOND_LOG(INFO, 3641 "Primary slave can be specified only once for bonded device %s", 3642 name); 3643 return -1; 3644 } 3645 3646 /* Parse link status monitor polling interval */ 3647 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG); 3648 if (arg_count == 1) { 3649 uint32_t lsc_poll_interval_ms; 3650 3651 if (rte_kvargs_process(kvlist, 3652 PMD_BOND_LSC_POLL_PERIOD_KVARG, 3653 &bond_ethdev_parse_time_ms_kvarg, 3654 &lsc_poll_interval_ms) < 0) { 3655 RTE_BOND_LOG(INFO, 3656 "Invalid lsc polling interval value specified for bonded" 3657 " device %s", name); 3658 return -1; 3659 } 3660 3661 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms) 3662 != 0) { 3663 RTE_BOND_LOG(ERR, 3664 "Failed to set lsc monitor polling interval (%u ms) on bonded device %s", 3665 lsc_poll_interval_ms, name); 3666 return -1; 3667 } 3668 } else if (arg_count > 1) { 3669 RTE_BOND_LOG(INFO, 3670 "LSC polling interval can be specified only once for bonded" 3671 " device %s", name); 3672 return -1; 3673 } 3674 3675 /* Parse link up interrupt propagation delay */ 3676 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG); 3677 if (arg_count == 1) { 3678 uint32_t link_up_delay_ms; 3679 3680 if (rte_kvargs_process(kvlist, 3681 PMD_BOND_LINK_UP_PROP_DELAY_KVARG, 3682 &bond_ethdev_parse_time_ms_kvarg, 3683 &link_up_delay_ms) < 0) { 3684 RTE_BOND_LOG(INFO, 3685 "Invalid link up propagation delay value specified for" 3686 " bonded device %s", name); 3687 return -1; 3688 } 3689 3690 /* Set balance mode transmit policy*/ 3691 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms) 3692 != 0) { 3693 RTE_BOND_LOG(ERR, 3694 "Failed to set link up propagation delay (%u ms) on bonded" 3695 " device %s", link_up_delay_ms, name); 3696 return -1; 3697 } 3698 } else if (arg_count > 1) { 3699 RTE_BOND_LOG(INFO, 3700 "Link up propagation delay can be specified only once for" 3701 " bonded device %s", name); 3702 return -1; 3703 } 3704 3705 /* Parse link down interrupt propagation delay */ 3706 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG); 3707 if (arg_count == 1) { 3708 uint32_t link_down_delay_ms; 3709 3710 if (rte_kvargs_process(kvlist, 3711 PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG, 3712 &bond_ethdev_parse_time_ms_kvarg, 3713 &link_down_delay_ms) < 0) { 3714 RTE_BOND_LOG(INFO, 3715 "Invalid link down propagation delay value specified for" 3716 " bonded device %s", name); 3717 return -1; 3718 } 3719 3720 /* Set balance mode transmit policy*/ 3721 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms) 3722 != 0) { 3723 RTE_BOND_LOG(ERR, 3724 "Failed to set link down propagation delay (%u ms) on bonded device %s", 3725 link_down_delay_ms, name); 3726 return -1; 3727 } 3728 } else if (arg_count > 1) { 3729 RTE_BOND_LOG(INFO, 3730 "Link down propagation delay can be specified only once for bonded device %s", 3731 name); 3732 return -1; 3733 } 3734 3735 return 0; 3736 } 3737 3738 struct rte_vdev_driver pmd_bond_drv = { 3739 .probe = bond_probe, 3740 .remove = bond_remove, 3741 }; 3742 3743 RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv); 3744 RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond); 3745 3746 RTE_PMD_REGISTER_PARAM_STRING(net_bonding, 3747 "slave=<ifc> " 3748 "primary=<ifc> " 3749 "mode=[0-6] " 3750 "xmit_policy=[l2 | l23 | l34] " 3751 "agg_mode=[count | stable | bandwidth] " 3752 "socket_id=<int> " 3753 "mac=<mac addr> " 3754 "lsc_poll_period_ms=<int> " 3755 "up_delay=<int> " 3756 "down_delay=<int>"); 3757 3758 RTE_LOG_REGISTER(bond_logtype, pmd.net.bond, NOTICE); 3759