1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2017 Intel Corporation 3 */ 4 #include <stdlib.h> 5 #include <stdbool.h> 6 #include <netinet/in.h> 7 8 #include <rte_mbuf.h> 9 #include <rte_malloc.h> 10 #include <ethdev_driver.h> 11 #include <ethdev_vdev.h> 12 #include <rte_tcp.h> 13 #include <rte_udp.h> 14 #include <rte_ip.h> 15 #include <rte_ip_frag.h> 16 #include <rte_devargs.h> 17 #include <rte_kvargs.h> 18 #include <rte_bus_vdev.h> 19 #include <rte_alarm.h> 20 #include <rte_cycles.h> 21 #include <rte_string_fns.h> 22 23 #include "rte_eth_bond.h" 24 #include "eth_bond_private.h" 25 #include "eth_bond_8023ad_private.h" 26 27 #define REORDER_PERIOD_MS 10 28 #define DEFAULT_POLLING_INTERVAL_10_MS (10) 29 #define BOND_MAX_MAC_ADDRS 16 30 31 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port) 32 33 /* Table for statistics in mode 5 TLB */ 34 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS]; 35 36 static inline size_t 37 get_vlan_offset(struct rte_ether_hdr *eth_hdr, uint16_t *proto) 38 { 39 size_t vlan_offset = 0; 40 41 if (rte_cpu_to_be_16(RTE_ETHER_TYPE_VLAN) == *proto || 42 rte_cpu_to_be_16(RTE_ETHER_TYPE_QINQ) == *proto) { 43 struct rte_vlan_hdr *vlan_hdr = 44 (struct rte_vlan_hdr *)(eth_hdr + 1); 45 46 vlan_offset = sizeof(struct rte_vlan_hdr); 47 *proto = vlan_hdr->eth_proto; 48 49 if (rte_cpu_to_be_16(RTE_ETHER_TYPE_VLAN) == *proto) { 50 vlan_hdr = vlan_hdr + 1; 51 *proto = vlan_hdr->eth_proto; 52 vlan_offset += sizeof(struct rte_vlan_hdr); 53 } 54 } 55 return vlan_offset; 56 } 57 58 static uint16_t 59 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) 60 { 61 struct bond_dev_private *internals; 62 63 uint16_t num_rx_total = 0; 64 uint16_t slave_count; 65 uint16_t active_slave; 66 int i; 67 68 /* Cast to structure, containing bonded device's port id and queue id */ 69 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue; 70 internals = bd_rx_q->dev_private; 71 slave_count = internals->active_slave_count; 72 active_slave = bd_rx_q->active_slave; 73 74 for (i = 0; i < slave_count && nb_pkts; i++) { 75 uint16_t num_rx_slave; 76 77 /* Offset of pointer to *bufs increases as packets are received 78 * from other slaves */ 79 num_rx_slave = 80 rte_eth_rx_burst(internals->active_slaves[active_slave], 81 bd_rx_q->queue_id, 82 bufs + num_rx_total, nb_pkts); 83 num_rx_total += num_rx_slave; 84 nb_pkts -= num_rx_slave; 85 if (++active_slave == slave_count) 86 active_slave = 0; 87 } 88 89 if (++bd_rx_q->active_slave >= slave_count) 90 bd_rx_q->active_slave = 0; 91 return num_rx_total; 92 } 93 94 static uint16_t 95 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs, 96 uint16_t nb_pkts) 97 { 98 struct bond_dev_private *internals; 99 100 /* Cast to structure, containing bonded device's port id and queue id */ 101 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue; 102 103 internals = bd_rx_q->dev_private; 104 105 return rte_eth_rx_burst(internals->current_primary_port, 106 bd_rx_q->queue_id, bufs, nb_pkts); 107 } 108 109 static inline uint8_t 110 is_lacp_packets(uint16_t ethertype, uint8_t subtype, struct rte_mbuf *mbuf) 111 { 112 const uint16_t ether_type_slow_be = 113 rte_be_to_cpu_16(RTE_ETHER_TYPE_SLOW); 114 115 return !((mbuf->ol_flags & PKT_RX_VLAN) ? mbuf->vlan_tci : 0) && 116 (ethertype == ether_type_slow_be && 117 (subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP)); 118 } 119 120 /***************************************************************************** 121 * Flow director's setup for mode 4 optimization 122 */ 123 124 static struct rte_flow_item_eth flow_item_eth_type_8023ad = { 125 .dst.addr_bytes = { 0 }, 126 .src.addr_bytes = { 0 }, 127 .type = RTE_BE16(RTE_ETHER_TYPE_SLOW), 128 }; 129 130 static struct rte_flow_item_eth flow_item_eth_mask_type_8023ad = { 131 .dst.addr_bytes = { 0 }, 132 .src.addr_bytes = { 0 }, 133 .type = 0xFFFF, 134 }; 135 136 static struct rte_flow_item flow_item_8023ad[] = { 137 { 138 .type = RTE_FLOW_ITEM_TYPE_ETH, 139 .spec = &flow_item_eth_type_8023ad, 140 .last = NULL, 141 .mask = &flow_item_eth_mask_type_8023ad, 142 }, 143 { 144 .type = RTE_FLOW_ITEM_TYPE_END, 145 .spec = NULL, 146 .last = NULL, 147 .mask = NULL, 148 } 149 }; 150 151 const struct rte_flow_attr flow_attr_8023ad = { 152 .group = 0, 153 .priority = 0, 154 .ingress = 1, 155 .egress = 0, 156 .reserved = 0, 157 }; 158 159 int 160 bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev, 161 uint16_t slave_port) { 162 struct rte_eth_dev_info slave_info; 163 struct rte_flow_error error; 164 struct bond_dev_private *internals = bond_dev->data->dev_private; 165 166 const struct rte_flow_action_queue lacp_queue_conf = { 167 .index = 0, 168 }; 169 170 const struct rte_flow_action actions[] = { 171 { 172 .type = RTE_FLOW_ACTION_TYPE_QUEUE, 173 .conf = &lacp_queue_conf 174 }, 175 { 176 .type = RTE_FLOW_ACTION_TYPE_END, 177 } 178 }; 179 180 int ret = rte_flow_validate(slave_port, &flow_attr_8023ad, 181 flow_item_8023ad, actions, &error); 182 if (ret < 0) { 183 RTE_BOND_LOG(ERR, "%s: %s (slave_port=%d queue_id=%d)", 184 __func__, error.message, slave_port, 185 internals->mode4.dedicated_queues.rx_qid); 186 return -1; 187 } 188 189 ret = rte_eth_dev_info_get(slave_port, &slave_info); 190 if (ret != 0) { 191 RTE_BOND_LOG(ERR, 192 "%s: Error during getting device (port %u) info: %s\n", 193 __func__, slave_port, strerror(-ret)); 194 195 return ret; 196 } 197 198 if (slave_info.max_rx_queues < bond_dev->data->nb_rx_queues || 199 slave_info.max_tx_queues < bond_dev->data->nb_tx_queues) { 200 RTE_BOND_LOG(ERR, 201 "%s: Slave %d capabilities doesn't allow to allocate additional queues", 202 __func__, slave_port); 203 return -1; 204 } 205 206 return 0; 207 } 208 209 int 210 bond_8023ad_slow_pkt_hw_filter_supported(uint16_t port_id) { 211 struct rte_eth_dev *bond_dev = &rte_eth_devices[port_id]; 212 struct bond_dev_private *internals = bond_dev->data->dev_private; 213 struct rte_eth_dev_info bond_info; 214 uint16_t idx; 215 int ret; 216 217 /* Verify if all slaves in bonding supports flow director and */ 218 if (internals->slave_count > 0) { 219 ret = rte_eth_dev_info_get(bond_dev->data->port_id, &bond_info); 220 if (ret != 0) { 221 RTE_BOND_LOG(ERR, 222 "%s: Error during getting device (port %u) info: %s\n", 223 __func__, bond_dev->data->port_id, 224 strerror(-ret)); 225 226 return ret; 227 } 228 229 internals->mode4.dedicated_queues.rx_qid = bond_info.nb_rx_queues; 230 internals->mode4.dedicated_queues.tx_qid = bond_info.nb_tx_queues; 231 232 for (idx = 0; idx < internals->slave_count; idx++) { 233 if (bond_ethdev_8023ad_flow_verify(bond_dev, 234 internals->slaves[idx].port_id) != 0) 235 return -1; 236 } 237 } 238 239 return 0; 240 } 241 242 int 243 bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint16_t slave_port) { 244 245 struct rte_flow_error error; 246 struct bond_dev_private *internals = bond_dev->data->dev_private; 247 struct rte_flow_action_queue lacp_queue_conf = { 248 .index = internals->mode4.dedicated_queues.rx_qid, 249 }; 250 251 const struct rte_flow_action actions[] = { 252 { 253 .type = RTE_FLOW_ACTION_TYPE_QUEUE, 254 .conf = &lacp_queue_conf 255 }, 256 { 257 .type = RTE_FLOW_ACTION_TYPE_END, 258 } 259 }; 260 261 internals->mode4.dedicated_queues.flow[slave_port] = rte_flow_create(slave_port, 262 &flow_attr_8023ad, flow_item_8023ad, actions, &error); 263 if (internals->mode4.dedicated_queues.flow[slave_port] == NULL) { 264 RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_set: %s " 265 "(slave_port=%d queue_id=%d)", 266 error.message, slave_port, 267 internals->mode4.dedicated_queues.rx_qid); 268 return -1; 269 } 270 271 return 0; 272 } 273 274 static inline uint16_t 275 rx_burst_8023ad(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts, 276 bool dedicated_rxq) 277 { 278 /* Cast to structure, containing bonded device's port id and queue id */ 279 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue; 280 struct bond_dev_private *internals = bd_rx_q->dev_private; 281 struct rte_eth_dev *bonded_eth_dev = 282 &rte_eth_devices[internals->port_id]; 283 struct rte_ether_addr *bond_mac = bonded_eth_dev->data->mac_addrs; 284 struct rte_ether_hdr *hdr; 285 286 const uint16_t ether_type_slow_be = 287 rte_be_to_cpu_16(RTE_ETHER_TYPE_SLOW); 288 uint16_t num_rx_total = 0; /* Total number of received packets */ 289 uint16_t slaves[RTE_MAX_ETHPORTS]; 290 uint16_t slave_count, idx; 291 292 uint8_t collecting; /* current slave collecting status */ 293 const uint8_t promisc = rte_eth_promiscuous_get(internals->port_id); 294 const uint8_t allmulti = rte_eth_allmulticast_get(internals->port_id); 295 uint8_t subtype; 296 uint16_t i; 297 uint16_t j; 298 uint16_t k; 299 300 /* Copy slave list to protect against slave up/down changes during tx 301 * bursting */ 302 slave_count = internals->active_slave_count; 303 memcpy(slaves, internals->active_slaves, 304 sizeof(internals->active_slaves[0]) * slave_count); 305 306 idx = bd_rx_q->active_slave; 307 if (idx >= slave_count) { 308 bd_rx_q->active_slave = 0; 309 idx = 0; 310 } 311 for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) { 312 j = num_rx_total; 313 collecting = ACTOR_STATE(&bond_mode_8023ad_ports[slaves[idx]], 314 COLLECTING); 315 316 /* Read packets from this slave */ 317 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id, 318 &bufs[num_rx_total], nb_pkts - num_rx_total); 319 320 for (k = j; k < 2 && k < num_rx_total; k++) 321 rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *)); 322 323 /* Handle slow protocol packets. */ 324 while (j < num_rx_total) { 325 if (j + 3 < num_rx_total) 326 rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *)); 327 328 hdr = rte_pktmbuf_mtod(bufs[j], struct rte_ether_hdr *); 329 subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype; 330 331 /* Remove packet from array if: 332 * - it is slow packet but no dedicated rxq is present, 333 * - slave is not in collecting state, 334 * - bonding interface is not in promiscuous mode: 335 * - packet is unicast and address does not match, 336 * - packet is multicast and bonding interface 337 * is not in allmulti, 338 */ 339 if (unlikely( 340 (!dedicated_rxq && 341 is_lacp_packets(hdr->ether_type, subtype, 342 bufs[j])) || 343 !collecting || 344 (!promisc && 345 ((rte_is_unicast_ether_addr(&hdr->d_addr) && 346 !rte_is_same_ether_addr(bond_mac, 347 &hdr->d_addr)) || 348 (!allmulti && 349 rte_is_multicast_ether_addr(&hdr->d_addr)))))) { 350 351 if (hdr->ether_type == ether_type_slow_be) { 352 bond_mode_8023ad_handle_slow_pkt( 353 internals, slaves[idx], bufs[j]); 354 } else 355 rte_pktmbuf_free(bufs[j]); 356 357 /* Packet is managed by mode 4 or dropped, shift the array */ 358 num_rx_total--; 359 if (j < num_rx_total) { 360 memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) * 361 (num_rx_total - j)); 362 } 363 } else 364 j++; 365 } 366 if (unlikely(++idx == slave_count)) 367 idx = 0; 368 } 369 370 if (++bd_rx_q->active_slave >= slave_count) 371 bd_rx_q->active_slave = 0; 372 373 return num_rx_total; 374 } 375 376 static uint16_t 377 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs, 378 uint16_t nb_pkts) 379 { 380 return rx_burst_8023ad(queue, bufs, nb_pkts, false); 381 } 382 383 static uint16_t 384 bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs, 385 uint16_t nb_pkts) 386 { 387 return rx_burst_8023ad(queue, bufs, nb_pkts, true); 388 } 389 390 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1) 391 uint32_t burstnumberRX; 392 uint32_t burstnumberTX; 393 394 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB 395 396 static void 397 arp_op_name(uint16_t arp_op, char *buf, size_t buf_len) 398 { 399 switch (arp_op) { 400 case RTE_ARP_OP_REQUEST: 401 strlcpy(buf, "ARP Request", buf_len); 402 return; 403 case RTE_ARP_OP_REPLY: 404 strlcpy(buf, "ARP Reply", buf_len); 405 return; 406 case RTE_ARP_OP_REVREQUEST: 407 strlcpy(buf, "Reverse ARP Request", buf_len); 408 return; 409 case RTE_ARP_OP_REVREPLY: 410 strlcpy(buf, "Reverse ARP Reply", buf_len); 411 return; 412 case RTE_ARP_OP_INVREQUEST: 413 strlcpy(buf, "Peer Identify Request", buf_len); 414 return; 415 case RTE_ARP_OP_INVREPLY: 416 strlcpy(buf, "Peer Identify Reply", buf_len); 417 return; 418 default: 419 break; 420 } 421 strlcpy(buf, "Unknown", buf_len); 422 return; 423 } 424 #endif 425 #define MaxIPv4String 16 426 static void 427 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size) 428 { 429 uint32_t ipv4_addr; 430 431 ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr); 432 snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF, 433 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF, 434 ipv4_addr & 0xFF); 435 } 436 437 #define MAX_CLIENTS_NUMBER 128 438 uint8_t active_clients; 439 struct client_stats_t { 440 uint16_t port; 441 uint32_t ipv4_addr; 442 uint32_t ipv4_rx_packets; 443 uint32_t ipv4_tx_packets; 444 }; 445 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER]; 446 447 static void 448 update_client_stats(uint32_t addr, uint16_t port, uint32_t *TXorRXindicator) 449 { 450 int i = 0; 451 452 for (; i < MAX_CLIENTS_NUMBER; i++) { 453 if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port)) { 454 /* Just update RX packets number for this client */ 455 if (TXorRXindicator == &burstnumberRX) 456 client_stats[i].ipv4_rx_packets++; 457 else 458 client_stats[i].ipv4_tx_packets++; 459 return; 460 } 461 } 462 /* We have a new client. Insert him to the table, and increment stats */ 463 if (TXorRXindicator == &burstnumberRX) 464 client_stats[active_clients].ipv4_rx_packets++; 465 else 466 client_stats[active_clients].ipv4_tx_packets++; 467 client_stats[active_clients].ipv4_addr = addr; 468 client_stats[active_clients].port = port; 469 active_clients++; 470 471 } 472 473 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB 474 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber) \ 475 rte_log(RTE_LOG_DEBUG, bond_logtype, \ 476 "%s port:%d SrcMAC:" RTE_ETHER_ADDR_PRT_FMT " SrcIP:%s " \ 477 "DstMAC:" RTE_ETHER_ADDR_PRT_FMT " DstIP:%s %s %d\n", \ 478 info, \ 479 port, \ 480 RTE_ETHER_ADDR_BYTES(ð_h->s_addr), \ 481 src_ip, \ 482 RTE_ETHER_ADDR_BYTES(ð_h->d_addr), \ 483 dst_ip, \ 484 arp_op, ++burstnumber) 485 #endif 486 487 static void 488 mode6_debug(const char __rte_unused *info, 489 struct rte_ether_hdr *eth_h, uint16_t port, 490 uint32_t __rte_unused *burstnumber) 491 { 492 struct rte_ipv4_hdr *ipv4_h; 493 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB 494 struct rte_arp_hdr *arp_h; 495 char dst_ip[16]; 496 char ArpOp[24]; 497 char buf[16]; 498 #endif 499 char src_ip[16]; 500 501 uint16_t ether_type = eth_h->ether_type; 502 uint16_t offset = get_vlan_offset(eth_h, ðer_type); 503 504 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB 505 strlcpy(buf, info, 16); 506 #endif 507 508 if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4)) { 509 ipv4_h = (struct rte_ipv4_hdr *)((char *)(eth_h + 1) + offset); 510 ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String); 511 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB 512 ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String); 513 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber); 514 #endif 515 update_client_stats(ipv4_h->src_addr, port, burstnumber); 516 } 517 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB 518 else if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP)) { 519 arp_h = (struct rte_arp_hdr *)((char *)(eth_h + 1) + offset); 520 ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String); 521 ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String); 522 arp_op_name(rte_be_to_cpu_16(arp_h->arp_opcode), 523 ArpOp, sizeof(ArpOp)); 524 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber); 525 } 526 #endif 527 } 528 #endif 529 530 static uint16_t 531 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) 532 { 533 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue; 534 struct bond_dev_private *internals = bd_rx_q->dev_private; 535 struct rte_ether_hdr *eth_h; 536 uint16_t ether_type, offset; 537 uint16_t nb_recv_pkts; 538 int i; 539 540 nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts); 541 542 for (i = 0; i < nb_recv_pkts; i++) { 543 eth_h = rte_pktmbuf_mtod(bufs[i], struct rte_ether_hdr *); 544 ether_type = eth_h->ether_type; 545 offset = get_vlan_offset(eth_h, ðer_type); 546 547 if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP)) { 548 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1) 549 mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX); 550 #endif 551 bond_mode_alb_arp_recv(eth_h, offset, internals); 552 } 553 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1) 554 else if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4)) 555 mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX); 556 #endif 557 } 558 559 return nb_recv_pkts; 560 } 561 562 static uint16_t 563 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs, 564 uint16_t nb_pkts) 565 { 566 struct bond_dev_private *internals; 567 struct bond_tx_queue *bd_tx_q; 568 569 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts]; 570 uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 }; 571 572 uint16_t num_of_slaves; 573 uint16_t slaves[RTE_MAX_ETHPORTS]; 574 575 uint16_t num_tx_total = 0, num_tx_slave; 576 577 static int slave_idx = 0; 578 int i, cslave_idx = 0, tx_fail_total = 0; 579 580 bd_tx_q = (struct bond_tx_queue *)queue; 581 internals = bd_tx_q->dev_private; 582 583 /* Copy slave list to protect against slave up/down changes during tx 584 * bursting */ 585 num_of_slaves = internals->active_slave_count; 586 memcpy(slaves, internals->active_slaves, 587 sizeof(internals->active_slaves[0]) * num_of_slaves); 588 589 if (num_of_slaves < 1) 590 return num_tx_total; 591 592 /* Populate slaves mbuf with which packets are to be sent on it */ 593 for (i = 0; i < nb_pkts; i++) { 594 cslave_idx = (slave_idx + i) % num_of_slaves; 595 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i]; 596 } 597 598 /* increment current slave index so the next call to tx burst starts on the 599 * next slave */ 600 slave_idx = ++cslave_idx; 601 602 /* Send packet burst on each slave device */ 603 for (i = 0; i < num_of_slaves; i++) { 604 if (slave_nb_pkts[i] > 0) { 605 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id, 606 slave_bufs[i], slave_nb_pkts[i]); 607 608 /* if tx burst fails move packets to end of bufs */ 609 if (unlikely(num_tx_slave < slave_nb_pkts[i])) { 610 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave; 611 612 tx_fail_total += tx_fail_slave; 613 614 memcpy(&bufs[nb_pkts - tx_fail_total], 615 &slave_bufs[i][num_tx_slave], 616 tx_fail_slave * sizeof(bufs[0])); 617 } 618 num_tx_total += num_tx_slave; 619 } 620 } 621 622 return num_tx_total; 623 } 624 625 static uint16_t 626 bond_ethdev_tx_burst_active_backup(void *queue, 627 struct rte_mbuf **bufs, uint16_t nb_pkts) 628 { 629 struct bond_dev_private *internals; 630 struct bond_tx_queue *bd_tx_q; 631 632 bd_tx_q = (struct bond_tx_queue *)queue; 633 internals = bd_tx_q->dev_private; 634 635 if (internals->active_slave_count < 1) 636 return 0; 637 638 return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id, 639 bufs, nb_pkts); 640 } 641 642 static inline uint16_t 643 ether_hash(struct rte_ether_hdr *eth_hdr) 644 { 645 unaligned_uint16_t *word_src_addr = 646 (unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes; 647 unaligned_uint16_t *word_dst_addr = 648 (unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes; 649 650 return (word_src_addr[0] ^ word_dst_addr[0]) ^ 651 (word_src_addr[1] ^ word_dst_addr[1]) ^ 652 (word_src_addr[2] ^ word_dst_addr[2]); 653 } 654 655 static inline uint32_t 656 ipv4_hash(struct rte_ipv4_hdr *ipv4_hdr) 657 { 658 return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr; 659 } 660 661 static inline uint32_t 662 ipv6_hash(struct rte_ipv6_hdr *ipv6_hdr) 663 { 664 unaligned_uint32_t *word_src_addr = 665 (unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]); 666 unaligned_uint32_t *word_dst_addr = 667 (unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]); 668 669 return (word_src_addr[0] ^ word_dst_addr[0]) ^ 670 (word_src_addr[1] ^ word_dst_addr[1]) ^ 671 (word_src_addr[2] ^ word_dst_addr[2]) ^ 672 (word_src_addr[3] ^ word_dst_addr[3]); 673 } 674 675 676 void 677 burst_xmit_l2_hash(struct rte_mbuf **buf, uint16_t nb_pkts, 678 uint16_t slave_count, uint16_t *slaves) 679 { 680 struct rte_ether_hdr *eth_hdr; 681 uint32_t hash; 682 int i; 683 684 for (i = 0; i < nb_pkts; i++) { 685 eth_hdr = rte_pktmbuf_mtod(buf[i], struct rte_ether_hdr *); 686 687 hash = ether_hash(eth_hdr); 688 689 slaves[i] = (hash ^= hash >> 8) % slave_count; 690 } 691 } 692 693 void 694 burst_xmit_l23_hash(struct rte_mbuf **buf, uint16_t nb_pkts, 695 uint16_t slave_count, uint16_t *slaves) 696 { 697 uint16_t i; 698 struct rte_ether_hdr *eth_hdr; 699 uint16_t proto; 700 size_t vlan_offset; 701 uint32_t hash, l3hash; 702 703 for (i = 0; i < nb_pkts; i++) { 704 eth_hdr = rte_pktmbuf_mtod(buf[i], struct rte_ether_hdr *); 705 l3hash = 0; 706 707 proto = eth_hdr->ether_type; 708 hash = ether_hash(eth_hdr); 709 710 vlan_offset = get_vlan_offset(eth_hdr, &proto); 711 712 if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4) == proto) { 713 struct rte_ipv4_hdr *ipv4_hdr = (struct rte_ipv4_hdr *) 714 ((char *)(eth_hdr + 1) + vlan_offset); 715 l3hash = ipv4_hash(ipv4_hdr); 716 717 } else if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6) == proto) { 718 struct rte_ipv6_hdr *ipv6_hdr = (struct rte_ipv6_hdr *) 719 ((char *)(eth_hdr + 1) + vlan_offset); 720 l3hash = ipv6_hash(ipv6_hdr); 721 } 722 723 hash = hash ^ l3hash; 724 hash ^= hash >> 16; 725 hash ^= hash >> 8; 726 727 slaves[i] = hash % slave_count; 728 } 729 } 730 731 void 732 burst_xmit_l34_hash(struct rte_mbuf **buf, uint16_t nb_pkts, 733 uint16_t slave_count, uint16_t *slaves) 734 { 735 struct rte_ether_hdr *eth_hdr; 736 uint16_t proto; 737 size_t vlan_offset; 738 int i; 739 740 struct rte_udp_hdr *udp_hdr; 741 struct rte_tcp_hdr *tcp_hdr; 742 uint32_t hash, l3hash, l4hash; 743 744 for (i = 0; i < nb_pkts; i++) { 745 eth_hdr = rte_pktmbuf_mtod(buf[i], struct rte_ether_hdr *); 746 size_t pkt_end = (size_t)eth_hdr + rte_pktmbuf_data_len(buf[i]); 747 proto = eth_hdr->ether_type; 748 vlan_offset = get_vlan_offset(eth_hdr, &proto); 749 l3hash = 0; 750 l4hash = 0; 751 752 if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4) == proto) { 753 struct rte_ipv4_hdr *ipv4_hdr = (struct rte_ipv4_hdr *) 754 ((char *)(eth_hdr + 1) + vlan_offset); 755 size_t ip_hdr_offset; 756 757 l3hash = ipv4_hash(ipv4_hdr); 758 759 /* there is no L4 header in fragmented packet */ 760 if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr) 761 == 0)) { 762 ip_hdr_offset = (ipv4_hdr->version_ihl 763 & RTE_IPV4_HDR_IHL_MASK) * 764 RTE_IPV4_IHL_MULTIPLIER; 765 766 if (ipv4_hdr->next_proto_id == IPPROTO_TCP) { 767 tcp_hdr = (struct rte_tcp_hdr *) 768 ((char *)ipv4_hdr + 769 ip_hdr_offset); 770 if ((size_t)tcp_hdr + sizeof(*tcp_hdr) 771 < pkt_end) 772 l4hash = HASH_L4_PORTS(tcp_hdr); 773 } else if (ipv4_hdr->next_proto_id == 774 IPPROTO_UDP) { 775 udp_hdr = (struct rte_udp_hdr *) 776 ((char *)ipv4_hdr + 777 ip_hdr_offset); 778 if ((size_t)udp_hdr + sizeof(*udp_hdr) 779 < pkt_end) 780 l4hash = HASH_L4_PORTS(udp_hdr); 781 } 782 } 783 } else if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6) == proto) { 784 struct rte_ipv6_hdr *ipv6_hdr = (struct rte_ipv6_hdr *) 785 ((char *)(eth_hdr + 1) + vlan_offset); 786 l3hash = ipv6_hash(ipv6_hdr); 787 788 if (ipv6_hdr->proto == IPPROTO_TCP) { 789 tcp_hdr = (struct rte_tcp_hdr *)(ipv6_hdr + 1); 790 l4hash = HASH_L4_PORTS(tcp_hdr); 791 } else if (ipv6_hdr->proto == IPPROTO_UDP) { 792 udp_hdr = (struct rte_udp_hdr *)(ipv6_hdr + 1); 793 l4hash = HASH_L4_PORTS(udp_hdr); 794 } 795 } 796 797 hash = l3hash ^ l4hash; 798 hash ^= hash >> 16; 799 hash ^= hash >> 8; 800 801 slaves[i] = hash % slave_count; 802 } 803 } 804 805 struct bwg_slave { 806 uint64_t bwg_left_int; 807 uint64_t bwg_left_remainder; 808 uint16_t slave; 809 }; 810 811 void 812 bond_tlb_activate_slave(struct bond_dev_private *internals) { 813 int i; 814 815 for (i = 0; i < internals->active_slave_count; i++) { 816 tlb_last_obytets[internals->active_slaves[i]] = 0; 817 } 818 } 819 820 static int 821 bandwidth_cmp(const void *a, const void *b) 822 { 823 const struct bwg_slave *bwg_a = a; 824 const struct bwg_slave *bwg_b = b; 825 int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int; 826 int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder - 827 (int64_t)bwg_a->bwg_left_remainder; 828 if (diff > 0) 829 return 1; 830 else if (diff < 0) 831 return -1; 832 else if (diff2 > 0) 833 return 1; 834 else if (diff2 < 0) 835 return -1; 836 else 837 return 0; 838 } 839 840 static void 841 bandwidth_left(uint16_t port_id, uint64_t load, uint8_t update_idx, 842 struct bwg_slave *bwg_slave) 843 { 844 struct rte_eth_link link_status; 845 int ret; 846 847 ret = rte_eth_link_get_nowait(port_id, &link_status); 848 if (ret < 0) { 849 RTE_BOND_LOG(ERR, "Slave (port %u) link get failed: %s", 850 port_id, rte_strerror(-ret)); 851 return; 852 } 853 uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8; 854 if (link_bwg == 0) 855 return; 856 link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS; 857 bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg; 858 bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg; 859 } 860 861 static void 862 bond_ethdev_update_tlb_slave_cb(void *arg) 863 { 864 struct bond_dev_private *internals = arg; 865 struct rte_eth_stats slave_stats; 866 struct bwg_slave bwg_array[RTE_MAX_ETHPORTS]; 867 uint16_t slave_count; 868 uint64_t tx_bytes; 869 870 uint8_t update_stats = 0; 871 uint16_t slave_id; 872 uint16_t i; 873 874 internals->slave_update_idx++; 875 876 877 if (internals->slave_update_idx >= REORDER_PERIOD_MS) 878 update_stats = 1; 879 880 for (i = 0; i < internals->active_slave_count; i++) { 881 slave_id = internals->active_slaves[i]; 882 rte_eth_stats_get(slave_id, &slave_stats); 883 tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id]; 884 bandwidth_left(slave_id, tx_bytes, 885 internals->slave_update_idx, &bwg_array[i]); 886 bwg_array[i].slave = slave_id; 887 888 if (update_stats) { 889 tlb_last_obytets[slave_id] = slave_stats.obytes; 890 } 891 } 892 893 if (update_stats == 1) 894 internals->slave_update_idx = 0; 895 896 slave_count = i; 897 qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp); 898 for (i = 0; i < slave_count; i++) 899 internals->tlb_slaves_order[i] = bwg_array[i].slave; 900 901 rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb, 902 (struct bond_dev_private *)internals); 903 } 904 905 static uint16_t 906 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) 907 { 908 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue; 909 struct bond_dev_private *internals = bd_tx_q->dev_private; 910 911 struct rte_eth_dev *primary_port = 912 &rte_eth_devices[internals->primary_port]; 913 uint16_t num_tx_total = 0; 914 uint16_t i, j; 915 916 uint16_t num_of_slaves = internals->active_slave_count; 917 uint16_t slaves[RTE_MAX_ETHPORTS]; 918 919 struct rte_ether_hdr *ether_hdr; 920 struct rte_ether_addr primary_slave_addr; 921 struct rte_ether_addr active_slave_addr; 922 923 if (num_of_slaves < 1) 924 return num_tx_total; 925 926 memcpy(slaves, internals->tlb_slaves_order, 927 sizeof(internals->tlb_slaves_order[0]) * num_of_slaves); 928 929 930 rte_ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr); 931 932 if (nb_pkts > 3) { 933 for (i = 0; i < 3; i++) 934 rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*)); 935 } 936 937 for (i = 0; i < num_of_slaves; i++) { 938 rte_eth_macaddr_get(slaves[i], &active_slave_addr); 939 for (j = num_tx_total; j < nb_pkts; j++) { 940 if (j + 3 < nb_pkts) 941 rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*)); 942 943 ether_hdr = rte_pktmbuf_mtod(bufs[j], 944 struct rte_ether_hdr *); 945 if (rte_is_same_ether_addr(ðer_hdr->s_addr, 946 &primary_slave_addr)) 947 rte_ether_addr_copy(&active_slave_addr, 948 ðer_hdr->s_addr); 949 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1) 950 mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX); 951 #endif 952 } 953 954 num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id, 955 bufs + num_tx_total, nb_pkts - num_tx_total); 956 957 if (num_tx_total == nb_pkts) 958 break; 959 } 960 961 return num_tx_total; 962 } 963 964 void 965 bond_tlb_disable(struct bond_dev_private *internals) 966 { 967 rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals); 968 } 969 970 void 971 bond_tlb_enable(struct bond_dev_private *internals) 972 { 973 bond_ethdev_update_tlb_slave_cb(internals); 974 } 975 976 static uint16_t 977 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) 978 { 979 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue; 980 struct bond_dev_private *internals = bd_tx_q->dev_private; 981 982 struct rte_ether_hdr *eth_h; 983 uint16_t ether_type, offset; 984 985 struct client_data *client_info; 986 987 /* 988 * We create transmit buffers for every slave and one additional to send 989 * through tlb. In worst case every packet will be send on one port. 990 */ 991 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts]; 992 uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 }; 993 994 /* 995 * We create separate transmit buffers for update packets as they won't 996 * be counted in num_tx_total. 997 */ 998 struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE]; 999 uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 }; 1000 1001 struct rte_mbuf *upd_pkt; 1002 size_t pkt_size; 1003 1004 uint16_t num_send, num_not_send = 0; 1005 uint16_t num_tx_total = 0; 1006 uint16_t slave_idx; 1007 1008 int i, j; 1009 1010 /* Search tx buffer for ARP packets and forward them to alb */ 1011 for (i = 0; i < nb_pkts; i++) { 1012 eth_h = rte_pktmbuf_mtod(bufs[i], struct rte_ether_hdr *); 1013 ether_type = eth_h->ether_type; 1014 offset = get_vlan_offset(eth_h, ðer_type); 1015 1016 if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP)) { 1017 slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals); 1018 1019 /* Change src mac in eth header */ 1020 rte_eth_macaddr_get(slave_idx, ð_h->s_addr); 1021 1022 /* Add packet to slave tx buffer */ 1023 slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i]; 1024 slave_bufs_pkts[slave_idx]++; 1025 } else { 1026 /* If packet is not ARP, send it with TLB policy */ 1027 slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] = 1028 bufs[i]; 1029 slave_bufs_pkts[RTE_MAX_ETHPORTS]++; 1030 } 1031 } 1032 1033 /* Update connected client ARP tables */ 1034 if (internals->mode6.ntt) { 1035 for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) { 1036 client_info = &internals->mode6.client_table[i]; 1037 1038 if (client_info->in_use) { 1039 /* Allocate new packet to send ARP update on current slave */ 1040 upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool); 1041 if (upd_pkt == NULL) { 1042 RTE_BOND_LOG(ERR, 1043 "Failed to allocate ARP packet from pool"); 1044 continue; 1045 } 1046 pkt_size = sizeof(struct rte_ether_hdr) + 1047 sizeof(struct rte_arp_hdr) + 1048 client_info->vlan_count * 1049 sizeof(struct rte_vlan_hdr); 1050 upd_pkt->data_len = pkt_size; 1051 upd_pkt->pkt_len = pkt_size; 1052 1053 slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt, 1054 internals); 1055 1056 /* Add packet to update tx buffer */ 1057 update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt; 1058 update_bufs_pkts[slave_idx]++; 1059 } 1060 } 1061 internals->mode6.ntt = 0; 1062 } 1063 1064 /* Send ARP packets on proper slaves */ 1065 for (i = 0; i < RTE_MAX_ETHPORTS; i++) { 1066 if (slave_bufs_pkts[i] > 0) { 1067 num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, 1068 slave_bufs[i], slave_bufs_pkts[i]); 1069 for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) { 1070 bufs[nb_pkts - 1 - num_not_send - j] = 1071 slave_bufs[i][nb_pkts - 1 - j]; 1072 } 1073 1074 num_tx_total += num_send; 1075 num_not_send += slave_bufs_pkts[i] - num_send; 1076 1077 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1) 1078 /* Print TX stats including update packets */ 1079 for (j = 0; j < slave_bufs_pkts[i]; j++) { 1080 eth_h = rte_pktmbuf_mtod(slave_bufs[i][j], 1081 struct rte_ether_hdr *); 1082 mode6_debug("TX ARP:", eth_h, i, &burstnumberTX); 1083 } 1084 #endif 1085 } 1086 } 1087 1088 /* Send update packets on proper slaves */ 1089 for (i = 0; i < RTE_MAX_ETHPORTS; i++) { 1090 if (update_bufs_pkts[i] > 0) { 1091 num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i], 1092 update_bufs_pkts[i]); 1093 for (j = num_send; j < update_bufs_pkts[i]; j++) { 1094 rte_pktmbuf_free(update_bufs[i][j]); 1095 } 1096 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1) 1097 for (j = 0; j < update_bufs_pkts[i]; j++) { 1098 eth_h = rte_pktmbuf_mtod(update_bufs[i][j], 1099 struct rte_ether_hdr *); 1100 mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX); 1101 } 1102 #endif 1103 } 1104 } 1105 1106 /* Send non-ARP packets using tlb policy */ 1107 if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) { 1108 num_send = bond_ethdev_tx_burst_tlb(queue, 1109 slave_bufs[RTE_MAX_ETHPORTS], 1110 slave_bufs_pkts[RTE_MAX_ETHPORTS]); 1111 1112 for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) { 1113 bufs[nb_pkts - 1 - num_not_send - j] = 1114 slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j]; 1115 } 1116 1117 num_tx_total += num_send; 1118 } 1119 1120 return num_tx_total; 1121 } 1122 1123 static inline uint16_t 1124 tx_burst_balance(void *queue, struct rte_mbuf **bufs, uint16_t nb_bufs, 1125 uint16_t *slave_port_ids, uint16_t slave_count) 1126 { 1127 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue; 1128 struct bond_dev_private *internals = bd_tx_q->dev_private; 1129 1130 /* Array to sort mbufs for transmission on each slave into */ 1131 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs]; 1132 /* Number of mbufs for transmission on each slave */ 1133 uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 }; 1134 /* Mapping array generated by hash function to map mbufs to slaves */ 1135 uint16_t bufs_slave_port_idxs[nb_bufs]; 1136 1137 uint16_t slave_tx_count; 1138 uint16_t total_tx_count = 0, total_tx_fail_count = 0; 1139 1140 uint16_t i; 1141 1142 /* 1143 * Populate slaves mbuf with the packets which are to be sent on it 1144 * selecting output slave using hash based on xmit policy 1145 */ 1146 internals->burst_xmit_hash(bufs, nb_bufs, slave_count, 1147 bufs_slave_port_idxs); 1148 1149 for (i = 0; i < nb_bufs; i++) { 1150 /* Populate slave mbuf arrays with mbufs for that slave. */ 1151 uint16_t slave_idx = bufs_slave_port_idxs[i]; 1152 1153 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i]; 1154 } 1155 1156 /* Send packet burst on each slave device */ 1157 for (i = 0; i < slave_count; i++) { 1158 if (slave_nb_bufs[i] == 0) 1159 continue; 1160 1161 slave_tx_count = rte_eth_tx_burst(slave_port_ids[i], 1162 bd_tx_q->queue_id, slave_bufs[i], 1163 slave_nb_bufs[i]); 1164 1165 total_tx_count += slave_tx_count; 1166 1167 /* If tx burst fails move packets to end of bufs */ 1168 if (unlikely(slave_tx_count < slave_nb_bufs[i])) { 1169 int slave_tx_fail_count = slave_nb_bufs[i] - 1170 slave_tx_count; 1171 total_tx_fail_count += slave_tx_fail_count; 1172 memcpy(&bufs[nb_bufs - total_tx_fail_count], 1173 &slave_bufs[i][slave_tx_count], 1174 slave_tx_fail_count * sizeof(bufs[0])); 1175 } 1176 } 1177 1178 return total_tx_count; 1179 } 1180 1181 static uint16_t 1182 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs, 1183 uint16_t nb_bufs) 1184 { 1185 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue; 1186 struct bond_dev_private *internals = bd_tx_q->dev_private; 1187 1188 uint16_t slave_port_ids[RTE_MAX_ETHPORTS]; 1189 uint16_t slave_count; 1190 1191 if (unlikely(nb_bufs == 0)) 1192 return 0; 1193 1194 /* Copy slave list to protect against slave up/down changes during tx 1195 * bursting 1196 */ 1197 slave_count = internals->active_slave_count; 1198 if (unlikely(slave_count < 1)) 1199 return 0; 1200 1201 memcpy(slave_port_ids, internals->active_slaves, 1202 sizeof(slave_port_ids[0]) * slave_count); 1203 return tx_burst_balance(queue, bufs, nb_bufs, slave_port_ids, 1204 slave_count); 1205 } 1206 1207 static inline uint16_t 1208 tx_burst_8023ad(void *queue, struct rte_mbuf **bufs, uint16_t nb_bufs, 1209 bool dedicated_txq) 1210 { 1211 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue; 1212 struct bond_dev_private *internals = bd_tx_q->dev_private; 1213 1214 uint16_t slave_port_ids[RTE_MAX_ETHPORTS]; 1215 uint16_t slave_count; 1216 1217 uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS]; 1218 uint16_t dist_slave_count; 1219 1220 uint16_t slave_tx_count; 1221 1222 uint16_t i; 1223 1224 /* Copy slave list to protect against slave up/down changes during tx 1225 * bursting */ 1226 slave_count = internals->active_slave_count; 1227 if (unlikely(slave_count < 1)) 1228 return 0; 1229 1230 memcpy(slave_port_ids, internals->active_slaves, 1231 sizeof(slave_port_ids[0]) * slave_count); 1232 1233 if (dedicated_txq) 1234 goto skip_tx_ring; 1235 1236 /* Check for LACP control packets and send if available */ 1237 for (i = 0; i < slave_count; i++) { 1238 struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]]; 1239 struct rte_mbuf *ctrl_pkt = NULL; 1240 1241 if (likely(rte_ring_empty(port->tx_ring))) 1242 continue; 1243 1244 if (rte_ring_dequeue(port->tx_ring, 1245 (void **)&ctrl_pkt) != -ENOENT) { 1246 slave_tx_count = rte_eth_tx_burst(slave_port_ids[i], 1247 bd_tx_q->queue_id, &ctrl_pkt, 1); 1248 /* 1249 * re-enqueue LAG control plane packets to buffering 1250 * ring if transmission fails so the packet isn't lost. 1251 */ 1252 if (slave_tx_count != 1) 1253 rte_ring_enqueue(port->tx_ring, ctrl_pkt); 1254 } 1255 } 1256 1257 skip_tx_ring: 1258 if (unlikely(nb_bufs == 0)) 1259 return 0; 1260 1261 dist_slave_count = 0; 1262 for (i = 0; i < slave_count; i++) { 1263 struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]]; 1264 1265 if (ACTOR_STATE(port, DISTRIBUTING)) 1266 dist_slave_port_ids[dist_slave_count++] = 1267 slave_port_ids[i]; 1268 } 1269 1270 if (unlikely(dist_slave_count < 1)) 1271 return 0; 1272 1273 return tx_burst_balance(queue, bufs, nb_bufs, dist_slave_port_ids, 1274 dist_slave_count); 1275 } 1276 1277 static uint16_t 1278 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs, 1279 uint16_t nb_bufs) 1280 { 1281 return tx_burst_8023ad(queue, bufs, nb_bufs, false); 1282 } 1283 1284 static uint16_t 1285 bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs, 1286 uint16_t nb_bufs) 1287 { 1288 return tx_burst_8023ad(queue, bufs, nb_bufs, true); 1289 } 1290 1291 static uint16_t 1292 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs, 1293 uint16_t nb_pkts) 1294 { 1295 struct bond_dev_private *internals; 1296 struct bond_tx_queue *bd_tx_q; 1297 1298 uint16_t slaves[RTE_MAX_ETHPORTS]; 1299 uint8_t tx_failed_flag = 0; 1300 uint16_t num_of_slaves; 1301 1302 uint16_t max_nb_of_tx_pkts = 0; 1303 1304 int slave_tx_total[RTE_MAX_ETHPORTS]; 1305 int i, most_successful_tx_slave = -1; 1306 1307 bd_tx_q = (struct bond_tx_queue *)queue; 1308 internals = bd_tx_q->dev_private; 1309 1310 /* Copy slave list to protect against slave up/down changes during tx 1311 * bursting */ 1312 num_of_slaves = internals->active_slave_count; 1313 memcpy(slaves, internals->active_slaves, 1314 sizeof(internals->active_slaves[0]) * num_of_slaves); 1315 1316 if (num_of_slaves < 1) 1317 return 0; 1318 1319 /* Increment reference count on mbufs */ 1320 for (i = 0; i < nb_pkts; i++) 1321 rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1); 1322 1323 /* Transmit burst on each active slave */ 1324 for (i = 0; i < num_of_slaves; i++) { 1325 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id, 1326 bufs, nb_pkts); 1327 1328 if (unlikely(slave_tx_total[i] < nb_pkts)) 1329 tx_failed_flag = 1; 1330 1331 /* record the value and slave index for the slave which transmits the 1332 * maximum number of packets */ 1333 if (slave_tx_total[i] > max_nb_of_tx_pkts) { 1334 max_nb_of_tx_pkts = slave_tx_total[i]; 1335 most_successful_tx_slave = i; 1336 } 1337 } 1338 1339 /* if slaves fail to transmit packets from burst, the calling application 1340 * is not expected to know about multiple references to packets so we must 1341 * handle failures of all packets except those of the most successful slave 1342 */ 1343 if (unlikely(tx_failed_flag)) 1344 for (i = 0; i < num_of_slaves; i++) 1345 if (i != most_successful_tx_slave) 1346 while (slave_tx_total[i] < nb_pkts) 1347 rte_pktmbuf_free(bufs[slave_tx_total[i]++]); 1348 1349 return max_nb_of_tx_pkts; 1350 } 1351 1352 static void 1353 link_properties_set(struct rte_eth_dev *ethdev, struct rte_eth_link *slave_link) 1354 { 1355 struct bond_dev_private *bond_ctx = ethdev->data->dev_private; 1356 1357 if (bond_ctx->mode == BONDING_MODE_8023AD) { 1358 /** 1359 * If in mode 4 then save the link properties of the first 1360 * slave, all subsequent slaves must match these properties 1361 */ 1362 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link; 1363 1364 bond_link->link_autoneg = slave_link->link_autoneg; 1365 bond_link->link_duplex = slave_link->link_duplex; 1366 bond_link->link_speed = slave_link->link_speed; 1367 } else { 1368 /** 1369 * In any other mode the link properties are set to default 1370 * values of AUTONEG/DUPLEX 1371 */ 1372 ethdev->data->dev_link.link_autoneg = ETH_LINK_AUTONEG; 1373 ethdev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX; 1374 } 1375 } 1376 1377 static int 1378 link_properties_valid(struct rte_eth_dev *ethdev, 1379 struct rte_eth_link *slave_link) 1380 { 1381 struct bond_dev_private *bond_ctx = ethdev->data->dev_private; 1382 1383 if (bond_ctx->mode == BONDING_MODE_8023AD) { 1384 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link; 1385 1386 if (bond_link->link_duplex != slave_link->link_duplex || 1387 bond_link->link_autoneg != slave_link->link_autoneg || 1388 bond_link->link_speed != slave_link->link_speed) 1389 return -1; 1390 } 1391 1392 return 0; 1393 } 1394 1395 int 1396 mac_address_get(struct rte_eth_dev *eth_dev, 1397 struct rte_ether_addr *dst_mac_addr) 1398 { 1399 struct rte_ether_addr *mac_addr; 1400 1401 if (eth_dev == NULL) { 1402 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified"); 1403 return -1; 1404 } 1405 1406 if (dst_mac_addr == NULL) { 1407 RTE_BOND_LOG(ERR, "NULL pointer MAC specified"); 1408 return -1; 1409 } 1410 1411 mac_addr = eth_dev->data->mac_addrs; 1412 1413 rte_ether_addr_copy(mac_addr, dst_mac_addr); 1414 return 0; 1415 } 1416 1417 int 1418 mac_address_set(struct rte_eth_dev *eth_dev, 1419 struct rte_ether_addr *new_mac_addr) 1420 { 1421 struct rte_ether_addr *mac_addr; 1422 1423 if (eth_dev == NULL) { 1424 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified"); 1425 return -1; 1426 } 1427 1428 if (new_mac_addr == NULL) { 1429 RTE_BOND_LOG(ERR, "NULL pointer MAC specified"); 1430 return -1; 1431 } 1432 1433 mac_addr = eth_dev->data->mac_addrs; 1434 1435 /* If new MAC is different to current MAC then update */ 1436 if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0) 1437 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr)); 1438 1439 return 0; 1440 } 1441 1442 static const struct rte_ether_addr null_mac_addr; 1443 1444 /* 1445 * Add additional MAC addresses to the slave 1446 */ 1447 int 1448 slave_add_mac_addresses(struct rte_eth_dev *bonded_eth_dev, 1449 uint16_t slave_port_id) 1450 { 1451 int i, ret; 1452 struct rte_ether_addr *mac_addr; 1453 1454 for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) { 1455 mac_addr = &bonded_eth_dev->data->mac_addrs[i]; 1456 if (rte_is_same_ether_addr(mac_addr, &null_mac_addr)) 1457 break; 1458 1459 ret = rte_eth_dev_mac_addr_add(slave_port_id, mac_addr, 0); 1460 if (ret < 0) { 1461 /* rollback */ 1462 for (i--; i > 0; i--) 1463 rte_eth_dev_mac_addr_remove(slave_port_id, 1464 &bonded_eth_dev->data->mac_addrs[i]); 1465 return ret; 1466 } 1467 } 1468 1469 return 0; 1470 } 1471 1472 /* 1473 * Remove additional MAC addresses from the slave 1474 */ 1475 int 1476 slave_remove_mac_addresses(struct rte_eth_dev *bonded_eth_dev, 1477 uint16_t slave_port_id) 1478 { 1479 int i, rc, ret; 1480 struct rte_ether_addr *mac_addr; 1481 1482 rc = 0; 1483 for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) { 1484 mac_addr = &bonded_eth_dev->data->mac_addrs[i]; 1485 if (rte_is_same_ether_addr(mac_addr, &null_mac_addr)) 1486 break; 1487 1488 ret = rte_eth_dev_mac_addr_remove(slave_port_id, mac_addr); 1489 /* save only the first error */ 1490 if (ret < 0 && rc == 0) 1491 rc = ret; 1492 } 1493 1494 return rc; 1495 } 1496 1497 int 1498 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev) 1499 { 1500 struct bond_dev_private *internals = bonded_eth_dev->data->dev_private; 1501 bool set; 1502 int i; 1503 1504 /* Update slave devices MAC addresses */ 1505 if (internals->slave_count < 1) 1506 return -1; 1507 1508 switch (internals->mode) { 1509 case BONDING_MODE_ROUND_ROBIN: 1510 case BONDING_MODE_BALANCE: 1511 case BONDING_MODE_BROADCAST: 1512 for (i = 0; i < internals->slave_count; i++) { 1513 if (rte_eth_dev_default_mac_addr_set( 1514 internals->slaves[i].port_id, 1515 bonded_eth_dev->data->mac_addrs)) { 1516 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address", 1517 internals->slaves[i].port_id); 1518 return -1; 1519 } 1520 } 1521 break; 1522 case BONDING_MODE_8023AD: 1523 bond_mode_8023ad_mac_address_update(bonded_eth_dev); 1524 break; 1525 case BONDING_MODE_ACTIVE_BACKUP: 1526 case BONDING_MODE_TLB: 1527 case BONDING_MODE_ALB: 1528 default: 1529 set = true; 1530 for (i = 0; i < internals->slave_count; i++) { 1531 if (internals->slaves[i].port_id == 1532 internals->current_primary_port) { 1533 if (rte_eth_dev_default_mac_addr_set( 1534 internals->current_primary_port, 1535 bonded_eth_dev->data->mac_addrs)) { 1536 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address", 1537 internals->current_primary_port); 1538 set = false; 1539 } 1540 } else { 1541 if (rte_eth_dev_default_mac_addr_set( 1542 internals->slaves[i].port_id, 1543 &internals->slaves[i].persisted_mac_addr)) { 1544 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address", 1545 internals->slaves[i].port_id); 1546 } 1547 } 1548 } 1549 if (!set) 1550 return -1; 1551 } 1552 1553 return 0; 1554 } 1555 1556 int 1557 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode) 1558 { 1559 struct bond_dev_private *internals; 1560 1561 internals = eth_dev->data->dev_private; 1562 1563 switch (mode) { 1564 case BONDING_MODE_ROUND_ROBIN: 1565 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin; 1566 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst; 1567 break; 1568 case BONDING_MODE_ACTIVE_BACKUP: 1569 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup; 1570 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup; 1571 break; 1572 case BONDING_MODE_BALANCE: 1573 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance; 1574 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst; 1575 break; 1576 case BONDING_MODE_BROADCAST: 1577 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast; 1578 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst; 1579 break; 1580 case BONDING_MODE_8023AD: 1581 if (bond_mode_8023ad_enable(eth_dev) != 0) 1582 return -1; 1583 1584 if (internals->mode4.dedicated_queues.enabled == 0) { 1585 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad; 1586 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad; 1587 RTE_BOND_LOG(WARNING, 1588 "Using mode 4, it is necessary to do TX burst " 1589 "and RX burst at least every 100ms."); 1590 } else { 1591 /* Use flow director's optimization */ 1592 eth_dev->rx_pkt_burst = 1593 bond_ethdev_rx_burst_8023ad_fast_queue; 1594 eth_dev->tx_pkt_burst = 1595 bond_ethdev_tx_burst_8023ad_fast_queue; 1596 } 1597 break; 1598 case BONDING_MODE_TLB: 1599 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb; 1600 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup; 1601 break; 1602 case BONDING_MODE_ALB: 1603 if (bond_mode_alb_enable(eth_dev) != 0) 1604 return -1; 1605 1606 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb; 1607 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb; 1608 break; 1609 default: 1610 return -1; 1611 } 1612 1613 internals->mode = mode; 1614 1615 return 0; 1616 } 1617 1618 1619 static int 1620 slave_configure_slow_queue(struct rte_eth_dev *bonded_eth_dev, 1621 struct rte_eth_dev *slave_eth_dev) 1622 { 1623 int errval = 0; 1624 struct bond_dev_private *internals = bonded_eth_dev->data->dev_private; 1625 struct port *port = &bond_mode_8023ad_ports[slave_eth_dev->data->port_id]; 1626 1627 if (port->slow_pool == NULL) { 1628 char mem_name[256]; 1629 int slave_id = slave_eth_dev->data->port_id; 1630 1631 snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_slow_pool", 1632 slave_id); 1633 port->slow_pool = rte_pktmbuf_pool_create(mem_name, 8191, 1634 250, 0, RTE_MBUF_DEFAULT_BUF_SIZE, 1635 slave_eth_dev->data->numa_node); 1636 1637 /* Any memory allocation failure in initialization is critical because 1638 * resources can't be free, so reinitialization is impossible. */ 1639 if (port->slow_pool == NULL) { 1640 rte_panic("Slave %u: Failed to create memory pool '%s': %s\n", 1641 slave_id, mem_name, rte_strerror(rte_errno)); 1642 } 1643 } 1644 1645 if (internals->mode4.dedicated_queues.enabled == 1) { 1646 /* Configure slow Rx queue */ 1647 1648 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, 1649 internals->mode4.dedicated_queues.rx_qid, 128, 1650 rte_eth_dev_socket_id(slave_eth_dev->data->port_id), 1651 NULL, port->slow_pool); 1652 if (errval != 0) { 1653 RTE_BOND_LOG(ERR, 1654 "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)", 1655 slave_eth_dev->data->port_id, 1656 internals->mode4.dedicated_queues.rx_qid, 1657 errval); 1658 return errval; 1659 } 1660 1661 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, 1662 internals->mode4.dedicated_queues.tx_qid, 512, 1663 rte_eth_dev_socket_id(slave_eth_dev->data->port_id), 1664 NULL); 1665 if (errval != 0) { 1666 RTE_BOND_LOG(ERR, 1667 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)", 1668 slave_eth_dev->data->port_id, 1669 internals->mode4.dedicated_queues.tx_qid, 1670 errval); 1671 return errval; 1672 } 1673 } 1674 return 0; 1675 } 1676 1677 int 1678 slave_configure(struct rte_eth_dev *bonded_eth_dev, 1679 struct rte_eth_dev *slave_eth_dev) 1680 { 1681 struct bond_rx_queue *bd_rx_q; 1682 struct bond_tx_queue *bd_tx_q; 1683 uint16_t nb_rx_queues; 1684 uint16_t nb_tx_queues; 1685 1686 int errval; 1687 uint16_t q_id; 1688 struct rte_flow_error flow_error; 1689 1690 struct bond_dev_private *internals = bonded_eth_dev->data->dev_private; 1691 1692 /* Stop slave */ 1693 errval = rte_eth_dev_stop(slave_eth_dev->data->port_id); 1694 if (errval != 0) 1695 RTE_BOND_LOG(ERR, "rte_eth_dev_stop: port %u, err (%d)", 1696 slave_eth_dev->data->port_id, errval); 1697 1698 /* Enable interrupts on slave device if supported */ 1699 if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) 1700 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1; 1701 1702 /* If RSS is enabled for bonding, try to enable it for slaves */ 1703 if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) { 1704 if (internals->rss_key_len != 0) { 1705 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len = 1706 internals->rss_key_len; 1707 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = 1708 internals->rss_key; 1709 } else { 1710 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL; 1711 } 1712 1713 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = 1714 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf; 1715 slave_eth_dev->data->dev_conf.rxmode.mq_mode = 1716 bonded_eth_dev->data->dev_conf.rxmode.mq_mode; 1717 } 1718 1719 if (bonded_eth_dev->data->dev_conf.rxmode.offloads & 1720 DEV_RX_OFFLOAD_VLAN_FILTER) 1721 slave_eth_dev->data->dev_conf.rxmode.offloads |= 1722 DEV_RX_OFFLOAD_VLAN_FILTER; 1723 else 1724 slave_eth_dev->data->dev_conf.rxmode.offloads &= 1725 ~DEV_RX_OFFLOAD_VLAN_FILTER; 1726 1727 slave_eth_dev->data->dev_conf.rxmode.max_rx_pkt_len = 1728 bonded_eth_dev->data->dev_conf.rxmode.max_rx_pkt_len; 1729 1730 if (bonded_eth_dev->data->dev_conf.rxmode.offloads & 1731 DEV_RX_OFFLOAD_JUMBO_FRAME) 1732 slave_eth_dev->data->dev_conf.rxmode.offloads |= 1733 DEV_RX_OFFLOAD_JUMBO_FRAME; 1734 else 1735 slave_eth_dev->data->dev_conf.rxmode.offloads &= 1736 ~DEV_RX_OFFLOAD_JUMBO_FRAME; 1737 1738 nb_rx_queues = bonded_eth_dev->data->nb_rx_queues; 1739 nb_tx_queues = bonded_eth_dev->data->nb_tx_queues; 1740 1741 if (internals->mode == BONDING_MODE_8023AD) { 1742 if (internals->mode4.dedicated_queues.enabled == 1) { 1743 nb_rx_queues++; 1744 nb_tx_queues++; 1745 } 1746 } 1747 1748 errval = rte_eth_dev_set_mtu(slave_eth_dev->data->port_id, 1749 bonded_eth_dev->data->mtu); 1750 if (errval != 0 && errval != -ENOTSUP) { 1751 RTE_BOND_LOG(ERR, "rte_eth_dev_set_mtu: port %u, err (%d)", 1752 slave_eth_dev->data->port_id, errval); 1753 return errval; 1754 } 1755 1756 /* Configure device */ 1757 errval = rte_eth_dev_configure(slave_eth_dev->data->port_id, 1758 nb_rx_queues, nb_tx_queues, 1759 &(slave_eth_dev->data->dev_conf)); 1760 if (errval != 0) { 1761 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u, err (%d)", 1762 slave_eth_dev->data->port_id, errval); 1763 return errval; 1764 } 1765 1766 /* Setup Rx Queues */ 1767 for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) { 1768 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id]; 1769 1770 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id, 1771 bd_rx_q->nb_rx_desc, 1772 rte_eth_dev_socket_id(slave_eth_dev->data->port_id), 1773 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool); 1774 if (errval != 0) { 1775 RTE_BOND_LOG(ERR, 1776 "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)", 1777 slave_eth_dev->data->port_id, q_id, errval); 1778 return errval; 1779 } 1780 } 1781 1782 /* Setup Tx Queues */ 1783 for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) { 1784 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id]; 1785 1786 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id, 1787 bd_tx_q->nb_tx_desc, 1788 rte_eth_dev_socket_id(slave_eth_dev->data->port_id), 1789 &bd_tx_q->tx_conf); 1790 if (errval != 0) { 1791 RTE_BOND_LOG(ERR, 1792 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)", 1793 slave_eth_dev->data->port_id, q_id, errval); 1794 return errval; 1795 } 1796 } 1797 1798 if (internals->mode == BONDING_MODE_8023AD && 1799 internals->mode4.dedicated_queues.enabled == 1) { 1800 if (slave_configure_slow_queue(bonded_eth_dev, slave_eth_dev) 1801 != 0) 1802 return errval; 1803 1804 errval = bond_ethdev_8023ad_flow_verify(bonded_eth_dev, 1805 slave_eth_dev->data->port_id); 1806 if (errval != 0) { 1807 RTE_BOND_LOG(ERR, 1808 "bond_ethdev_8023ad_flow_verify: port=%d, err (%d)", 1809 slave_eth_dev->data->port_id, errval); 1810 return errval; 1811 } 1812 1813 if (internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id] != NULL) 1814 rte_flow_destroy(slave_eth_dev->data->port_id, 1815 internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id], 1816 &flow_error); 1817 1818 errval = bond_ethdev_8023ad_flow_set(bonded_eth_dev, 1819 slave_eth_dev->data->port_id); 1820 if (errval != 0) { 1821 RTE_BOND_LOG(ERR, 1822 "bond_ethdev_8023ad_flow_set: port=%d, err (%d)", 1823 slave_eth_dev->data->port_id, errval); 1824 return errval; 1825 } 1826 } 1827 1828 /* Start device */ 1829 errval = rte_eth_dev_start(slave_eth_dev->data->port_id); 1830 if (errval != 0) { 1831 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)", 1832 slave_eth_dev->data->port_id, errval); 1833 return -1; 1834 } 1835 1836 /* If RSS is enabled for bonding, synchronize RETA */ 1837 if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) { 1838 int i; 1839 struct bond_dev_private *internals; 1840 1841 internals = bonded_eth_dev->data->dev_private; 1842 1843 for (i = 0; i < internals->slave_count; i++) { 1844 if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) { 1845 errval = rte_eth_dev_rss_reta_update( 1846 slave_eth_dev->data->port_id, 1847 &internals->reta_conf[0], 1848 internals->slaves[i].reta_size); 1849 if (errval != 0) { 1850 RTE_BOND_LOG(WARNING, 1851 "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)." 1852 " RSS Configuration for bonding may be inconsistent.", 1853 slave_eth_dev->data->port_id, errval); 1854 } 1855 break; 1856 } 1857 } 1858 } 1859 1860 /* If lsc interrupt is set, check initial slave's link status */ 1861 if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) { 1862 slave_eth_dev->dev_ops->link_update(slave_eth_dev, 0); 1863 bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id, 1864 RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id, 1865 NULL); 1866 } 1867 1868 return 0; 1869 } 1870 1871 void 1872 slave_remove(struct bond_dev_private *internals, 1873 struct rte_eth_dev *slave_eth_dev) 1874 { 1875 uint16_t i; 1876 1877 for (i = 0; i < internals->slave_count; i++) 1878 if (internals->slaves[i].port_id == 1879 slave_eth_dev->data->port_id) 1880 break; 1881 1882 if (i < (internals->slave_count - 1)) { 1883 struct rte_flow *flow; 1884 1885 memmove(&internals->slaves[i], &internals->slaves[i + 1], 1886 sizeof(internals->slaves[0]) * 1887 (internals->slave_count - i - 1)); 1888 TAILQ_FOREACH(flow, &internals->flow_list, next) { 1889 memmove(&flow->flows[i], &flow->flows[i + 1], 1890 sizeof(flow->flows[0]) * 1891 (internals->slave_count - i - 1)); 1892 flow->flows[internals->slave_count - 1] = NULL; 1893 } 1894 } 1895 1896 internals->slave_count--; 1897 1898 /* force reconfiguration of slave interfaces */ 1899 rte_eth_dev_internal_reset(slave_eth_dev); 1900 } 1901 1902 static void 1903 bond_ethdev_slave_link_status_change_monitor(void *cb_arg); 1904 1905 void 1906 slave_add(struct bond_dev_private *internals, 1907 struct rte_eth_dev *slave_eth_dev) 1908 { 1909 struct bond_slave_details *slave_details = 1910 &internals->slaves[internals->slave_count]; 1911 1912 slave_details->port_id = slave_eth_dev->data->port_id; 1913 slave_details->last_link_status = 0; 1914 1915 /* Mark slave devices that don't support interrupts so we can 1916 * compensate when we start the bond 1917 */ 1918 if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) { 1919 slave_details->link_status_poll_enabled = 1; 1920 } 1921 1922 slave_details->link_status_wait_to_complete = 0; 1923 /* clean tlb_last_obytes when adding port for bonding device */ 1924 memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs, 1925 sizeof(struct rte_ether_addr)); 1926 } 1927 1928 void 1929 bond_ethdev_primary_set(struct bond_dev_private *internals, 1930 uint16_t slave_port_id) 1931 { 1932 int i; 1933 1934 if (internals->active_slave_count < 1) 1935 internals->current_primary_port = slave_port_id; 1936 else 1937 /* Search bonded device slave ports for new proposed primary port */ 1938 for (i = 0; i < internals->active_slave_count; i++) { 1939 if (internals->active_slaves[i] == slave_port_id) 1940 internals->current_primary_port = slave_port_id; 1941 } 1942 } 1943 1944 static int 1945 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev); 1946 1947 static int 1948 bond_ethdev_start(struct rte_eth_dev *eth_dev) 1949 { 1950 struct bond_dev_private *internals; 1951 int i; 1952 1953 /* slave eth dev will be started by bonded device */ 1954 if (check_for_bonded_ethdev(eth_dev)) { 1955 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)", 1956 eth_dev->data->port_id); 1957 return -1; 1958 } 1959 1960 eth_dev->data->dev_link.link_status = ETH_LINK_DOWN; 1961 eth_dev->data->dev_started = 1; 1962 1963 internals = eth_dev->data->dev_private; 1964 1965 if (internals->slave_count == 0) { 1966 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices"); 1967 goto out_err; 1968 } 1969 1970 if (internals->user_defined_mac == 0) { 1971 struct rte_ether_addr *new_mac_addr = NULL; 1972 1973 for (i = 0; i < internals->slave_count; i++) 1974 if (internals->slaves[i].port_id == internals->primary_port) 1975 new_mac_addr = &internals->slaves[i].persisted_mac_addr; 1976 1977 if (new_mac_addr == NULL) 1978 goto out_err; 1979 1980 if (mac_address_set(eth_dev, new_mac_addr) != 0) { 1981 RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address", 1982 eth_dev->data->port_id); 1983 goto out_err; 1984 } 1985 } 1986 1987 if (internals->mode == BONDING_MODE_8023AD) { 1988 if (internals->mode4.dedicated_queues.enabled == 1) { 1989 internals->mode4.dedicated_queues.rx_qid = 1990 eth_dev->data->nb_rx_queues; 1991 internals->mode4.dedicated_queues.tx_qid = 1992 eth_dev->data->nb_tx_queues; 1993 } 1994 } 1995 1996 1997 /* Reconfigure each slave device if starting bonded device */ 1998 for (i = 0; i < internals->slave_count; i++) { 1999 struct rte_eth_dev *slave_ethdev = 2000 &(rte_eth_devices[internals->slaves[i].port_id]); 2001 if (slave_configure(eth_dev, slave_ethdev) != 0) { 2002 RTE_BOND_LOG(ERR, 2003 "bonded port (%d) failed to reconfigure slave device (%d)", 2004 eth_dev->data->port_id, 2005 internals->slaves[i].port_id); 2006 goto out_err; 2007 } 2008 /* We will need to poll for link status if any slave doesn't 2009 * support interrupts 2010 */ 2011 if (internals->slaves[i].link_status_poll_enabled) 2012 internals->link_status_polling_enabled = 1; 2013 } 2014 2015 /* start polling if needed */ 2016 if (internals->link_status_polling_enabled) { 2017 rte_eal_alarm_set( 2018 internals->link_status_polling_interval_ms * 1000, 2019 bond_ethdev_slave_link_status_change_monitor, 2020 (void *)&rte_eth_devices[internals->port_id]); 2021 } 2022 2023 /* Update all slave devices MACs*/ 2024 if (mac_address_slaves_update(eth_dev) != 0) 2025 goto out_err; 2026 2027 if (internals->user_defined_primary_port) 2028 bond_ethdev_primary_set(internals, internals->primary_port); 2029 2030 if (internals->mode == BONDING_MODE_8023AD) 2031 bond_mode_8023ad_start(eth_dev); 2032 2033 if (internals->mode == BONDING_MODE_TLB || 2034 internals->mode == BONDING_MODE_ALB) 2035 bond_tlb_enable(internals); 2036 2037 return 0; 2038 2039 out_err: 2040 eth_dev->data->dev_started = 0; 2041 return -1; 2042 } 2043 2044 static void 2045 bond_ethdev_free_queues(struct rte_eth_dev *dev) 2046 { 2047 uint16_t i; 2048 2049 if (dev->data->rx_queues != NULL) { 2050 for (i = 0; i < dev->data->nb_rx_queues; i++) { 2051 rte_free(dev->data->rx_queues[i]); 2052 dev->data->rx_queues[i] = NULL; 2053 } 2054 dev->data->nb_rx_queues = 0; 2055 } 2056 2057 if (dev->data->tx_queues != NULL) { 2058 for (i = 0; i < dev->data->nb_tx_queues; i++) { 2059 rte_free(dev->data->tx_queues[i]); 2060 dev->data->tx_queues[i] = NULL; 2061 } 2062 dev->data->nb_tx_queues = 0; 2063 } 2064 } 2065 2066 int 2067 bond_ethdev_stop(struct rte_eth_dev *eth_dev) 2068 { 2069 struct bond_dev_private *internals = eth_dev->data->dev_private; 2070 uint16_t i; 2071 int ret; 2072 2073 if (internals->mode == BONDING_MODE_8023AD) { 2074 struct port *port; 2075 void *pkt = NULL; 2076 2077 bond_mode_8023ad_stop(eth_dev); 2078 2079 /* Discard all messages to/from mode 4 state machines */ 2080 for (i = 0; i < internals->active_slave_count; i++) { 2081 port = &bond_mode_8023ad_ports[internals->active_slaves[i]]; 2082 2083 RTE_ASSERT(port->rx_ring != NULL); 2084 while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT) 2085 rte_pktmbuf_free(pkt); 2086 2087 RTE_ASSERT(port->tx_ring != NULL); 2088 while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT) 2089 rte_pktmbuf_free(pkt); 2090 } 2091 } 2092 2093 if (internals->mode == BONDING_MODE_TLB || 2094 internals->mode == BONDING_MODE_ALB) { 2095 bond_tlb_disable(internals); 2096 for (i = 0; i < internals->active_slave_count; i++) 2097 tlb_last_obytets[internals->active_slaves[i]] = 0; 2098 } 2099 2100 eth_dev->data->dev_link.link_status = ETH_LINK_DOWN; 2101 eth_dev->data->dev_started = 0; 2102 2103 internals->link_status_polling_enabled = 0; 2104 for (i = 0; i < internals->slave_count; i++) { 2105 uint16_t slave_id = internals->slaves[i].port_id; 2106 if (find_slave_by_id(internals->active_slaves, 2107 internals->active_slave_count, slave_id) != 2108 internals->active_slave_count) { 2109 internals->slaves[i].last_link_status = 0; 2110 ret = rte_eth_dev_stop(slave_id); 2111 if (ret != 0) { 2112 RTE_BOND_LOG(ERR, "Failed to stop device on port %u", 2113 slave_id); 2114 return ret; 2115 } 2116 deactivate_slave(eth_dev, slave_id); 2117 } 2118 } 2119 2120 return 0; 2121 } 2122 2123 int 2124 bond_ethdev_close(struct rte_eth_dev *dev) 2125 { 2126 struct bond_dev_private *internals = dev->data->dev_private; 2127 uint16_t bond_port_id = internals->port_id; 2128 int skipped = 0; 2129 struct rte_flow_error ferror; 2130 2131 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 2132 return 0; 2133 2134 RTE_BOND_LOG(INFO, "Closing bonded device %s", dev->device->name); 2135 while (internals->slave_count != skipped) { 2136 uint16_t port_id = internals->slaves[skipped].port_id; 2137 2138 if (rte_eth_dev_stop(port_id) != 0) { 2139 RTE_BOND_LOG(ERR, "Failed to stop device on port %u", 2140 port_id); 2141 skipped++; 2142 } 2143 2144 if (rte_eth_bond_slave_remove(bond_port_id, port_id) != 0) { 2145 RTE_BOND_LOG(ERR, 2146 "Failed to remove port %d from bonded device %s", 2147 port_id, dev->device->name); 2148 skipped++; 2149 } 2150 } 2151 bond_flow_ops.flush(dev, &ferror); 2152 bond_ethdev_free_queues(dev); 2153 rte_bitmap_reset(internals->vlan_filter_bmp); 2154 rte_bitmap_free(internals->vlan_filter_bmp); 2155 rte_free(internals->vlan_filter_bmpmem); 2156 2157 /* Try to release mempool used in mode6. If the bond 2158 * device is not mode6, free the NULL is not problem. 2159 */ 2160 rte_mempool_free(internals->mode6.mempool); 2161 2162 return 0; 2163 } 2164 2165 /* forward declaration */ 2166 static int bond_ethdev_configure(struct rte_eth_dev *dev); 2167 2168 static int 2169 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) 2170 { 2171 struct bond_dev_private *internals = dev->data->dev_private; 2172 struct bond_slave_details slave; 2173 int ret; 2174 2175 uint16_t max_nb_rx_queues = UINT16_MAX; 2176 uint16_t max_nb_tx_queues = UINT16_MAX; 2177 uint16_t max_rx_desc_lim = UINT16_MAX; 2178 uint16_t max_tx_desc_lim = UINT16_MAX; 2179 2180 dev_info->max_mac_addrs = BOND_MAX_MAC_ADDRS; 2181 2182 dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen ? 2183 internals->candidate_max_rx_pktlen : 2184 RTE_ETHER_MAX_JUMBO_FRAME_LEN; 2185 2186 /* Max number of tx/rx queues that the bonded device can support is the 2187 * minimum values of the bonded slaves, as all slaves must be capable 2188 * of supporting the same number of tx/rx queues. 2189 */ 2190 if (internals->slave_count > 0) { 2191 struct rte_eth_dev_info slave_info; 2192 uint16_t idx; 2193 2194 for (idx = 0; idx < internals->slave_count; idx++) { 2195 slave = internals->slaves[idx]; 2196 ret = rte_eth_dev_info_get(slave.port_id, &slave_info); 2197 if (ret != 0) { 2198 RTE_BOND_LOG(ERR, 2199 "%s: Error during getting device (port %u) info: %s\n", 2200 __func__, 2201 slave.port_id, 2202 strerror(-ret)); 2203 2204 return ret; 2205 } 2206 2207 if (slave_info.max_rx_queues < max_nb_rx_queues) 2208 max_nb_rx_queues = slave_info.max_rx_queues; 2209 2210 if (slave_info.max_tx_queues < max_nb_tx_queues) 2211 max_nb_tx_queues = slave_info.max_tx_queues; 2212 2213 if (slave_info.rx_desc_lim.nb_max < max_rx_desc_lim) 2214 max_rx_desc_lim = slave_info.rx_desc_lim.nb_max; 2215 2216 if (slave_info.tx_desc_lim.nb_max < max_tx_desc_lim) 2217 max_tx_desc_lim = slave_info.tx_desc_lim.nb_max; 2218 } 2219 } 2220 2221 dev_info->max_rx_queues = max_nb_rx_queues; 2222 dev_info->max_tx_queues = max_nb_tx_queues; 2223 2224 memcpy(&dev_info->default_rxconf, &internals->default_rxconf, 2225 sizeof(dev_info->default_rxconf)); 2226 memcpy(&dev_info->default_txconf, &internals->default_txconf, 2227 sizeof(dev_info->default_txconf)); 2228 2229 dev_info->rx_desc_lim.nb_max = max_rx_desc_lim; 2230 dev_info->tx_desc_lim.nb_max = max_tx_desc_lim; 2231 2232 /** 2233 * If dedicated hw queues enabled for link bonding device in LACP mode 2234 * then we need to reduce the maximum number of data path queues by 1. 2235 */ 2236 if (internals->mode == BONDING_MODE_8023AD && 2237 internals->mode4.dedicated_queues.enabled == 1) { 2238 dev_info->max_rx_queues--; 2239 dev_info->max_tx_queues--; 2240 } 2241 2242 dev_info->min_rx_bufsize = 0; 2243 2244 dev_info->rx_offload_capa = internals->rx_offload_capa; 2245 dev_info->tx_offload_capa = internals->tx_offload_capa; 2246 dev_info->rx_queue_offload_capa = internals->rx_queue_offload_capa; 2247 dev_info->tx_queue_offload_capa = internals->tx_queue_offload_capa; 2248 dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads; 2249 2250 dev_info->reta_size = internals->reta_size; 2251 2252 return 0; 2253 } 2254 2255 static int 2256 bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on) 2257 { 2258 int res; 2259 uint16_t i; 2260 struct bond_dev_private *internals = dev->data->dev_private; 2261 2262 /* don't do this while a slave is being added */ 2263 rte_spinlock_lock(&internals->lock); 2264 2265 if (on) 2266 rte_bitmap_set(internals->vlan_filter_bmp, vlan_id); 2267 else 2268 rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id); 2269 2270 for (i = 0; i < internals->slave_count; i++) { 2271 uint16_t port_id = internals->slaves[i].port_id; 2272 2273 res = rte_eth_dev_vlan_filter(port_id, vlan_id, on); 2274 if (res == ENOTSUP) 2275 RTE_BOND_LOG(WARNING, 2276 "Setting VLAN filter on slave port %u not supported.", 2277 port_id); 2278 } 2279 2280 rte_spinlock_unlock(&internals->lock); 2281 return 0; 2282 } 2283 2284 static int 2285 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id, 2286 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused, 2287 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool) 2288 { 2289 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *) 2290 rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue), 2291 0, dev->data->numa_node); 2292 if (bd_rx_q == NULL) 2293 return -1; 2294 2295 bd_rx_q->queue_id = rx_queue_id; 2296 bd_rx_q->dev_private = dev->data->dev_private; 2297 2298 bd_rx_q->nb_rx_desc = nb_rx_desc; 2299 2300 memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf)); 2301 bd_rx_q->mb_pool = mb_pool; 2302 2303 dev->data->rx_queues[rx_queue_id] = bd_rx_q; 2304 2305 return 0; 2306 } 2307 2308 static int 2309 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id, 2310 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused, 2311 const struct rte_eth_txconf *tx_conf) 2312 { 2313 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *) 2314 rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue), 2315 0, dev->data->numa_node); 2316 2317 if (bd_tx_q == NULL) 2318 return -1; 2319 2320 bd_tx_q->queue_id = tx_queue_id; 2321 bd_tx_q->dev_private = dev->data->dev_private; 2322 2323 bd_tx_q->nb_tx_desc = nb_tx_desc; 2324 memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf)); 2325 2326 dev->data->tx_queues[tx_queue_id] = bd_tx_q; 2327 2328 return 0; 2329 } 2330 2331 static void 2332 bond_ethdev_rx_queue_release(void *queue) 2333 { 2334 if (queue == NULL) 2335 return; 2336 2337 rte_free(queue); 2338 } 2339 2340 static void 2341 bond_ethdev_tx_queue_release(void *queue) 2342 { 2343 if (queue == NULL) 2344 return; 2345 2346 rte_free(queue); 2347 } 2348 2349 static void 2350 bond_ethdev_slave_link_status_change_monitor(void *cb_arg) 2351 { 2352 struct rte_eth_dev *bonded_ethdev, *slave_ethdev; 2353 struct bond_dev_private *internals; 2354 2355 /* Default value for polling slave found is true as we don't want to 2356 * disable the polling thread if we cannot get the lock */ 2357 int i, polling_slave_found = 1; 2358 2359 if (cb_arg == NULL) 2360 return; 2361 2362 bonded_ethdev = cb_arg; 2363 internals = bonded_ethdev->data->dev_private; 2364 2365 if (!bonded_ethdev->data->dev_started || 2366 !internals->link_status_polling_enabled) 2367 return; 2368 2369 /* If device is currently being configured then don't check slaves link 2370 * status, wait until next period */ 2371 if (rte_spinlock_trylock(&internals->lock)) { 2372 if (internals->slave_count > 0) 2373 polling_slave_found = 0; 2374 2375 for (i = 0; i < internals->slave_count; i++) { 2376 if (!internals->slaves[i].link_status_poll_enabled) 2377 continue; 2378 2379 slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id]; 2380 polling_slave_found = 1; 2381 2382 /* Update slave link status */ 2383 (*slave_ethdev->dev_ops->link_update)(slave_ethdev, 2384 internals->slaves[i].link_status_wait_to_complete); 2385 2386 /* if link status has changed since last checked then call lsc 2387 * event callback */ 2388 if (slave_ethdev->data->dev_link.link_status != 2389 internals->slaves[i].last_link_status) { 2390 internals->slaves[i].last_link_status = 2391 slave_ethdev->data->dev_link.link_status; 2392 2393 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id, 2394 RTE_ETH_EVENT_INTR_LSC, 2395 &bonded_ethdev->data->port_id, 2396 NULL); 2397 } 2398 } 2399 rte_spinlock_unlock(&internals->lock); 2400 } 2401 2402 if (polling_slave_found) 2403 /* Set alarm to continue monitoring link status of slave ethdev's */ 2404 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000, 2405 bond_ethdev_slave_link_status_change_monitor, cb_arg); 2406 } 2407 2408 static int 2409 bond_ethdev_link_update(struct rte_eth_dev *ethdev, int wait_to_complete) 2410 { 2411 int (*link_update)(uint16_t port_id, struct rte_eth_link *eth_link); 2412 2413 struct bond_dev_private *bond_ctx; 2414 struct rte_eth_link slave_link; 2415 2416 bool one_link_update_succeeded; 2417 uint32_t idx; 2418 int ret; 2419 2420 bond_ctx = ethdev->data->dev_private; 2421 2422 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE; 2423 2424 if (ethdev->data->dev_started == 0 || 2425 bond_ctx->active_slave_count == 0) { 2426 ethdev->data->dev_link.link_status = ETH_LINK_DOWN; 2427 return 0; 2428 } 2429 2430 ethdev->data->dev_link.link_status = ETH_LINK_UP; 2431 2432 if (wait_to_complete) 2433 link_update = rte_eth_link_get; 2434 else 2435 link_update = rte_eth_link_get_nowait; 2436 2437 switch (bond_ctx->mode) { 2438 case BONDING_MODE_BROADCAST: 2439 /** 2440 * Setting link speed to UINT32_MAX to ensure we pick up the 2441 * value of the first active slave 2442 */ 2443 ethdev->data->dev_link.link_speed = UINT32_MAX; 2444 2445 /** 2446 * link speed is minimum value of all the slaves link speed as 2447 * packet loss will occur on this slave if transmission at rates 2448 * greater than this are attempted 2449 */ 2450 for (idx = 0; idx < bond_ctx->active_slave_count; idx++) { 2451 ret = link_update(bond_ctx->active_slaves[idx], 2452 &slave_link); 2453 if (ret < 0) { 2454 ethdev->data->dev_link.link_speed = 2455 ETH_SPEED_NUM_NONE; 2456 RTE_BOND_LOG(ERR, 2457 "Slave (port %u) link get failed: %s", 2458 bond_ctx->active_slaves[idx], 2459 rte_strerror(-ret)); 2460 return 0; 2461 } 2462 2463 if (slave_link.link_speed < 2464 ethdev->data->dev_link.link_speed) 2465 ethdev->data->dev_link.link_speed = 2466 slave_link.link_speed; 2467 } 2468 break; 2469 case BONDING_MODE_ACTIVE_BACKUP: 2470 /* Current primary slave */ 2471 ret = link_update(bond_ctx->current_primary_port, &slave_link); 2472 if (ret < 0) { 2473 RTE_BOND_LOG(ERR, "Slave (port %u) link get failed: %s", 2474 bond_ctx->current_primary_port, 2475 rte_strerror(-ret)); 2476 return 0; 2477 } 2478 2479 ethdev->data->dev_link.link_speed = slave_link.link_speed; 2480 break; 2481 case BONDING_MODE_8023AD: 2482 ethdev->data->dev_link.link_autoneg = 2483 bond_ctx->mode4.slave_link.link_autoneg; 2484 ethdev->data->dev_link.link_duplex = 2485 bond_ctx->mode4.slave_link.link_duplex; 2486 /* fall through */ 2487 /* to update link speed */ 2488 case BONDING_MODE_ROUND_ROBIN: 2489 case BONDING_MODE_BALANCE: 2490 case BONDING_MODE_TLB: 2491 case BONDING_MODE_ALB: 2492 default: 2493 /** 2494 * In theses mode the maximum theoretical link speed is the sum 2495 * of all the slaves 2496 */ 2497 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE; 2498 one_link_update_succeeded = false; 2499 2500 for (idx = 0; idx < bond_ctx->active_slave_count; idx++) { 2501 ret = link_update(bond_ctx->active_slaves[idx], 2502 &slave_link); 2503 if (ret < 0) { 2504 RTE_BOND_LOG(ERR, 2505 "Slave (port %u) link get failed: %s", 2506 bond_ctx->active_slaves[idx], 2507 rte_strerror(-ret)); 2508 continue; 2509 } 2510 2511 one_link_update_succeeded = true; 2512 ethdev->data->dev_link.link_speed += 2513 slave_link.link_speed; 2514 } 2515 2516 if (!one_link_update_succeeded) { 2517 RTE_BOND_LOG(ERR, "All slaves link get failed"); 2518 return 0; 2519 } 2520 } 2521 2522 2523 return 0; 2524 } 2525 2526 2527 static int 2528 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) 2529 { 2530 struct bond_dev_private *internals = dev->data->dev_private; 2531 struct rte_eth_stats slave_stats; 2532 int i, j; 2533 2534 for (i = 0; i < internals->slave_count; i++) { 2535 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats); 2536 2537 stats->ipackets += slave_stats.ipackets; 2538 stats->opackets += slave_stats.opackets; 2539 stats->ibytes += slave_stats.ibytes; 2540 stats->obytes += slave_stats.obytes; 2541 stats->imissed += slave_stats.imissed; 2542 stats->ierrors += slave_stats.ierrors; 2543 stats->oerrors += slave_stats.oerrors; 2544 stats->rx_nombuf += slave_stats.rx_nombuf; 2545 2546 for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) { 2547 stats->q_ipackets[j] += slave_stats.q_ipackets[j]; 2548 stats->q_opackets[j] += slave_stats.q_opackets[j]; 2549 stats->q_ibytes[j] += slave_stats.q_ibytes[j]; 2550 stats->q_obytes[j] += slave_stats.q_obytes[j]; 2551 stats->q_errors[j] += slave_stats.q_errors[j]; 2552 } 2553 2554 } 2555 2556 return 0; 2557 } 2558 2559 static int 2560 bond_ethdev_stats_reset(struct rte_eth_dev *dev) 2561 { 2562 struct bond_dev_private *internals = dev->data->dev_private; 2563 int i; 2564 int err; 2565 int ret; 2566 2567 for (i = 0, err = 0; i < internals->slave_count; i++) { 2568 ret = rte_eth_stats_reset(internals->slaves[i].port_id); 2569 if (ret != 0) 2570 err = ret; 2571 } 2572 2573 return err; 2574 } 2575 2576 static int 2577 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev) 2578 { 2579 struct bond_dev_private *internals = eth_dev->data->dev_private; 2580 int i; 2581 int ret = 0; 2582 uint16_t port_id; 2583 2584 switch (internals->mode) { 2585 /* Promiscuous mode is propagated to all slaves */ 2586 case BONDING_MODE_ROUND_ROBIN: 2587 case BONDING_MODE_BALANCE: 2588 case BONDING_MODE_BROADCAST: 2589 case BONDING_MODE_8023AD: { 2590 unsigned int slave_ok = 0; 2591 2592 for (i = 0; i < internals->slave_count; i++) { 2593 port_id = internals->slaves[i].port_id; 2594 2595 ret = rte_eth_promiscuous_enable(port_id); 2596 if (ret != 0) 2597 RTE_BOND_LOG(ERR, 2598 "Failed to enable promiscuous mode for port %u: %s", 2599 port_id, rte_strerror(-ret)); 2600 else 2601 slave_ok++; 2602 } 2603 /* 2604 * Report success if operation is successful on at least 2605 * on one slave. Otherwise return last error code. 2606 */ 2607 if (slave_ok > 0) 2608 ret = 0; 2609 break; 2610 } 2611 /* Promiscuous mode is propagated only to primary slave */ 2612 case BONDING_MODE_ACTIVE_BACKUP: 2613 case BONDING_MODE_TLB: 2614 case BONDING_MODE_ALB: 2615 default: 2616 /* Do not touch promisc when there cannot be primary ports */ 2617 if (internals->slave_count == 0) 2618 break; 2619 port_id = internals->current_primary_port; 2620 ret = rte_eth_promiscuous_enable(port_id); 2621 if (ret != 0) 2622 RTE_BOND_LOG(ERR, 2623 "Failed to enable promiscuous mode for port %u: %s", 2624 port_id, rte_strerror(-ret)); 2625 } 2626 2627 return ret; 2628 } 2629 2630 static int 2631 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev) 2632 { 2633 struct bond_dev_private *internals = dev->data->dev_private; 2634 int i; 2635 int ret = 0; 2636 uint16_t port_id; 2637 2638 switch (internals->mode) { 2639 /* Promiscuous mode is propagated to all slaves */ 2640 case BONDING_MODE_ROUND_ROBIN: 2641 case BONDING_MODE_BALANCE: 2642 case BONDING_MODE_BROADCAST: 2643 case BONDING_MODE_8023AD: { 2644 unsigned int slave_ok = 0; 2645 2646 for (i = 0; i < internals->slave_count; i++) { 2647 port_id = internals->slaves[i].port_id; 2648 2649 if (internals->mode == BONDING_MODE_8023AD && 2650 bond_mode_8023ad_ports[port_id].forced_rx_flags == 2651 BOND_8023AD_FORCED_PROMISC) { 2652 slave_ok++; 2653 continue; 2654 } 2655 ret = rte_eth_promiscuous_disable(port_id); 2656 if (ret != 0) 2657 RTE_BOND_LOG(ERR, 2658 "Failed to disable promiscuous mode for port %u: %s", 2659 port_id, rte_strerror(-ret)); 2660 else 2661 slave_ok++; 2662 } 2663 /* 2664 * Report success if operation is successful on at least 2665 * on one slave. Otherwise return last error code. 2666 */ 2667 if (slave_ok > 0) 2668 ret = 0; 2669 break; 2670 } 2671 /* Promiscuous mode is propagated only to primary slave */ 2672 case BONDING_MODE_ACTIVE_BACKUP: 2673 case BONDING_MODE_TLB: 2674 case BONDING_MODE_ALB: 2675 default: 2676 /* Do not touch promisc when there cannot be primary ports */ 2677 if (internals->slave_count == 0) 2678 break; 2679 port_id = internals->current_primary_port; 2680 ret = rte_eth_promiscuous_disable(port_id); 2681 if (ret != 0) 2682 RTE_BOND_LOG(ERR, 2683 "Failed to disable promiscuous mode for port %u: %s", 2684 port_id, rte_strerror(-ret)); 2685 } 2686 2687 return ret; 2688 } 2689 2690 static int 2691 bond_ethdev_allmulticast_enable(struct rte_eth_dev *eth_dev) 2692 { 2693 struct bond_dev_private *internals = eth_dev->data->dev_private; 2694 int i; 2695 int ret = 0; 2696 uint16_t port_id; 2697 2698 switch (internals->mode) { 2699 /* allmulti mode is propagated to all slaves */ 2700 case BONDING_MODE_ROUND_ROBIN: 2701 case BONDING_MODE_BALANCE: 2702 case BONDING_MODE_BROADCAST: 2703 case BONDING_MODE_8023AD: { 2704 unsigned int slave_ok = 0; 2705 2706 for (i = 0; i < internals->slave_count; i++) { 2707 port_id = internals->slaves[i].port_id; 2708 2709 ret = rte_eth_allmulticast_enable(port_id); 2710 if (ret != 0) 2711 RTE_BOND_LOG(ERR, 2712 "Failed to enable allmulti mode for port %u: %s", 2713 port_id, rte_strerror(-ret)); 2714 else 2715 slave_ok++; 2716 } 2717 /* 2718 * Report success if operation is successful on at least 2719 * on one slave. Otherwise return last error code. 2720 */ 2721 if (slave_ok > 0) 2722 ret = 0; 2723 break; 2724 } 2725 /* allmulti mode is propagated only to primary slave */ 2726 case BONDING_MODE_ACTIVE_BACKUP: 2727 case BONDING_MODE_TLB: 2728 case BONDING_MODE_ALB: 2729 default: 2730 /* Do not touch allmulti when there cannot be primary ports */ 2731 if (internals->slave_count == 0) 2732 break; 2733 port_id = internals->current_primary_port; 2734 ret = rte_eth_allmulticast_enable(port_id); 2735 if (ret != 0) 2736 RTE_BOND_LOG(ERR, 2737 "Failed to enable allmulti mode for port %u: %s", 2738 port_id, rte_strerror(-ret)); 2739 } 2740 2741 return ret; 2742 } 2743 2744 static int 2745 bond_ethdev_allmulticast_disable(struct rte_eth_dev *eth_dev) 2746 { 2747 struct bond_dev_private *internals = eth_dev->data->dev_private; 2748 int i; 2749 int ret = 0; 2750 uint16_t port_id; 2751 2752 switch (internals->mode) { 2753 /* allmulti mode is propagated to all slaves */ 2754 case BONDING_MODE_ROUND_ROBIN: 2755 case BONDING_MODE_BALANCE: 2756 case BONDING_MODE_BROADCAST: 2757 case BONDING_MODE_8023AD: { 2758 unsigned int slave_ok = 0; 2759 2760 for (i = 0; i < internals->slave_count; i++) { 2761 uint16_t port_id = internals->slaves[i].port_id; 2762 2763 if (internals->mode == BONDING_MODE_8023AD && 2764 bond_mode_8023ad_ports[port_id].forced_rx_flags == 2765 BOND_8023AD_FORCED_ALLMULTI) 2766 continue; 2767 2768 ret = rte_eth_allmulticast_disable(port_id); 2769 if (ret != 0) 2770 RTE_BOND_LOG(ERR, 2771 "Failed to disable allmulti mode for port %u: %s", 2772 port_id, rte_strerror(-ret)); 2773 else 2774 slave_ok++; 2775 } 2776 /* 2777 * Report success if operation is successful on at least 2778 * on one slave. Otherwise return last error code. 2779 */ 2780 if (slave_ok > 0) 2781 ret = 0; 2782 break; 2783 } 2784 /* allmulti mode is propagated only to primary slave */ 2785 case BONDING_MODE_ACTIVE_BACKUP: 2786 case BONDING_MODE_TLB: 2787 case BONDING_MODE_ALB: 2788 default: 2789 /* Do not touch allmulti when there cannot be primary ports */ 2790 if (internals->slave_count == 0) 2791 break; 2792 port_id = internals->current_primary_port; 2793 ret = rte_eth_allmulticast_disable(port_id); 2794 if (ret != 0) 2795 RTE_BOND_LOG(ERR, 2796 "Failed to disable allmulti mode for port %u: %s", 2797 port_id, rte_strerror(-ret)); 2798 } 2799 2800 return ret; 2801 } 2802 2803 static void 2804 bond_ethdev_delayed_lsc_propagation(void *arg) 2805 { 2806 if (arg == NULL) 2807 return; 2808 2809 rte_eth_dev_callback_process((struct rte_eth_dev *)arg, 2810 RTE_ETH_EVENT_INTR_LSC, NULL); 2811 } 2812 2813 int 2814 bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type, 2815 void *param, void *ret_param __rte_unused) 2816 { 2817 struct rte_eth_dev *bonded_eth_dev; 2818 struct bond_dev_private *internals; 2819 struct rte_eth_link link; 2820 int rc = -1; 2821 int ret; 2822 2823 uint8_t lsc_flag = 0; 2824 int valid_slave = 0; 2825 uint16_t active_pos; 2826 uint16_t i; 2827 2828 if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL) 2829 return rc; 2830 2831 bonded_eth_dev = &rte_eth_devices[*(uint16_t *)param]; 2832 2833 if (check_for_bonded_ethdev(bonded_eth_dev)) 2834 return rc; 2835 2836 internals = bonded_eth_dev->data->dev_private; 2837 2838 /* If the device isn't started don't handle interrupts */ 2839 if (!bonded_eth_dev->data->dev_started) 2840 return rc; 2841 2842 /* verify that port_id is a valid slave of bonded port */ 2843 for (i = 0; i < internals->slave_count; i++) { 2844 if (internals->slaves[i].port_id == port_id) { 2845 valid_slave = 1; 2846 break; 2847 } 2848 } 2849 2850 if (!valid_slave) 2851 return rc; 2852 2853 /* Synchronize lsc callback parallel calls either by real link event 2854 * from the slaves PMDs or by the bonding PMD itself. 2855 */ 2856 rte_spinlock_lock(&internals->lsc_lock); 2857 2858 /* Search for port in active port list */ 2859 active_pos = find_slave_by_id(internals->active_slaves, 2860 internals->active_slave_count, port_id); 2861 2862 ret = rte_eth_link_get_nowait(port_id, &link); 2863 if (ret < 0) 2864 RTE_BOND_LOG(ERR, "Slave (port %u) link get failed", port_id); 2865 2866 if (ret == 0 && link.link_status) { 2867 if (active_pos < internals->active_slave_count) 2868 goto link_update; 2869 2870 /* check link state properties if bonded link is up*/ 2871 if (bonded_eth_dev->data->dev_link.link_status == ETH_LINK_UP) { 2872 if (link_properties_valid(bonded_eth_dev, &link) != 0) 2873 RTE_BOND_LOG(ERR, "Invalid link properties " 2874 "for slave %d in bonding mode %d", 2875 port_id, internals->mode); 2876 } else { 2877 /* inherit slave link properties */ 2878 link_properties_set(bonded_eth_dev, &link); 2879 } 2880 2881 /* If no active slave ports then set this port to be 2882 * the primary port. 2883 */ 2884 if (internals->active_slave_count < 1) { 2885 /* If first active slave, then change link status */ 2886 bonded_eth_dev->data->dev_link.link_status = 2887 ETH_LINK_UP; 2888 internals->current_primary_port = port_id; 2889 lsc_flag = 1; 2890 2891 mac_address_slaves_update(bonded_eth_dev); 2892 } 2893 2894 activate_slave(bonded_eth_dev, port_id); 2895 2896 /* If the user has defined the primary port then default to 2897 * using it. 2898 */ 2899 if (internals->user_defined_primary_port && 2900 internals->primary_port == port_id) 2901 bond_ethdev_primary_set(internals, port_id); 2902 } else { 2903 if (active_pos == internals->active_slave_count) 2904 goto link_update; 2905 2906 /* Remove from active slave list */ 2907 deactivate_slave(bonded_eth_dev, port_id); 2908 2909 if (internals->active_slave_count < 1) 2910 lsc_flag = 1; 2911 2912 /* Update primary id, take first active slave from list or if none 2913 * available set to -1 */ 2914 if (port_id == internals->current_primary_port) { 2915 if (internals->active_slave_count > 0) 2916 bond_ethdev_primary_set(internals, 2917 internals->active_slaves[0]); 2918 else 2919 internals->current_primary_port = internals->primary_port; 2920 mac_address_slaves_update(bonded_eth_dev); 2921 } 2922 } 2923 2924 link_update: 2925 /** 2926 * Update bonded device link properties after any change to active 2927 * slaves 2928 */ 2929 bond_ethdev_link_update(bonded_eth_dev, 0); 2930 2931 if (lsc_flag) { 2932 /* Cancel any possible outstanding interrupts if delays are enabled */ 2933 if (internals->link_up_delay_ms > 0 || 2934 internals->link_down_delay_ms > 0) 2935 rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation, 2936 bonded_eth_dev); 2937 2938 if (bonded_eth_dev->data->dev_link.link_status) { 2939 if (internals->link_up_delay_ms > 0) 2940 rte_eal_alarm_set(internals->link_up_delay_ms * 1000, 2941 bond_ethdev_delayed_lsc_propagation, 2942 (void *)bonded_eth_dev); 2943 else 2944 rte_eth_dev_callback_process(bonded_eth_dev, 2945 RTE_ETH_EVENT_INTR_LSC, 2946 NULL); 2947 2948 } else { 2949 if (internals->link_down_delay_ms > 0) 2950 rte_eal_alarm_set(internals->link_down_delay_ms * 1000, 2951 bond_ethdev_delayed_lsc_propagation, 2952 (void *)bonded_eth_dev); 2953 else 2954 rte_eth_dev_callback_process(bonded_eth_dev, 2955 RTE_ETH_EVENT_INTR_LSC, 2956 NULL); 2957 } 2958 } 2959 2960 rte_spinlock_unlock(&internals->lsc_lock); 2961 2962 return rc; 2963 } 2964 2965 static int 2966 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev, 2967 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size) 2968 { 2969 unsigned i, j; 2970 int result = 0; 2971 int slave_reta_size; 2972 unsigned reta_count; 2973 struct bond_dev_private *internals = dev->data->dev_private; 2974 2975 if (reta_size != internals->reta_size) 2976 return -EINVAL; 2977 2978 /* Copy RETA table */ 2979 reta_count = (reta_size + RTE_RETA_GROUP_SIZE - 1) / 2980 RTE_RETA_GROUP_SIZE; 2981 2982 for (i = 0; i < reta_count; i++) { 2983 internals->reta_conf[i].mask = reta_conf[i].mask; 2984 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++) 2985 if ((reta_conf[i].mask >> j) & 0x01) 2986 internals->reta_conf[i].reta[j] = reta_conf[i].reta[j]; 2987 } 2988 2989 /* Fill rest of array */ 2990 for (; i < RTE_DIM(internals->reta_conf); i += reta_count) 2991 memcpy(&internals->reta_conf[i], &internals->reta_conf[0], 2992 sizeof(internals->reta_conf[0]) * reta_count); 2993 2994 /* Propagate RETA over slaves */ 2995 for (i = 0; i < internals->slave_count; i++) { 2996 slave_reta_size = internals->slaves[i].reta_size; 2997 result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id, 2998 &internals->reta_conf[0], slave_reta_size); 2999 if (result < 0) 3000 return result; 3001 } 3002 3003 return 0; 3004 } 3005 3006 static int 3007 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev, 3008 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size) 3009 { 3010 int i, j; 3011 struct bond_dev_private *internals = dev->data->dev_private; 3012 3013 if (reta_size != internals->reta_size) 3014 return -EINVAL; 3015 3016 /* Copy RETA table */ 3017 for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++) 3018 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++) 3019 if ((reta_conf[i].mask >> j) & 0x01) 3020 reta_conf[i].reta[j] = internals->reta_conf[i].reta[j]; 3021 3022 return 0; 3023 } 3024 3025 static int 3026 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev, 3027 struct rte_eth_rss_conf *rss_conf) 3028 { 3029 int i, result = 0; 3030 struct bond_dev_private *internals = dev->data->dev_private; 3031 struct rte_eth_rss_conf bond_rss_conf; 3032 3033 memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf)); 3034 3035 bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads; 3036 3037 if (bond_rss_conf.rss_hf != 0) 3038 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf; 3039 3040 if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len < 3041 sizeof(internals->rss_key)) { 3042 if (bond_rss_conf.rss_key_len == 0) 3043 bond_rss_conf.rss_key_len = 40; 3044 internals->rss_key_len = bond_rss_conf.rss_key_len; 3045 memcpy(internals->rss_key, bond_rss_conf.rss_key, 3046 internals->rss_key_len); 3047 } 3048 3049 for (i = 0; i < internals->slave_count; i++) { 3050 result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id, 3051 &bond_rss_conf); 3052 if (result < 0) 3053 return result; 3054 } 3055 3056 return 0; 3057 } 3058 3059 static int 3060 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev, 3061 struct rte_eth_rss_conf *rss_conf) 3062 { 3063 struct bond_dev_private *internals = dev->data->dev_private; 3064 3065 rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf; 3066 rss_conf->rss_key_len = internals->rss_key_len; 3067 if (rss_conf->rss_key) 3068 memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len); 3069 3070 return 0; 3071 } 3072 3073 static int 3074 bond_ethdev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) 3075 { 3076 struct rte_eth_dev *slave_eth_dev; 3077 struct bond_dev_private *internals = dev->data->dev_private; 3078 int ret, i; 3079 3080 rte_spinlock_lock(&internals->lock); 3081 3082 for (i = 0; i < internals->slave_count; i++) { 3083 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id]; 3084 if (*slave_eth_dev->dev_ops->mtu_set == NULL) { 3085 rte_spinlock_unlock(&internals->lock); 3086 return -ENOTSUP; 3087 } 3088 } 3089 for (i = 0; i < internals->slave_count; i++) { 3090 ret = rte_eth_dev_set_mtu(internals->slaves[i].port_id, mtu); 3091 if (ret < 0) { 3092 rte_spinlock_unlock(&internals->lock); 3093 return ret; 3094 } 3095 } 3096 3097 rte_spinlock_unlock(&internals->lock); 3098 return 0; 3099 } 3100 3101 static int 3102 bond_ethdev_mac_address_set(struct rte_eth_dev *dev, 3103 struct rte_ether_addr *addr) 3104 { 3105 if (mac_address_set(dev, addr)) { 3106 RTE_BOND_LOG(ERR, "Failed to update MAC address"); 3107 return -EINVAL; 3108 } 3109 3110 return 0; 3111 } 3112 3113 static int 3114 bond_flow_ops_get(struct rte_eth_dev *dev __rte_unused, 3115 const struct rte_flow_ops **ops) 3116 { 3117 *ops = &bond_flow_ops; 3118 return 0; 3119 } 3120 3121 static int 3122 bond_ethdev_mac_addr_add(struct rte_eth_dev *dev, 3123 struct rte_ether_addr *mac_addr, 3124 __rte_unused uint32_t index, uint32_t vmdq) 3125 { 3126 struct rte_eth_dev *slave_eth_dev; 3127 struct bond_dev_private *internals = dev->data->dev_private; 3128 int ret, i; 3129 3130 rte_spinlock_lock(&internals->lock); 3131 3132 for (i = 0; i < internals->slave_count; i++) { 3133 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id]; 3134 if (*slave_eth_dev->dev_ops->mac_addr_add == NULL || 3135 *slave_eth_dev->dev_ops->mac_addr_remove == NULL) { 3136 ret = -ENOTSUP; 3137 goto end; 3138 } 3139 } 3140 3141 for (i = 0; i < internals->slave_count; i++) { 3142 ret = rte_eth_dev_mac_addr_add(internals->slaves[i].port_id, 3143 mac_addr, vmdq); 3144 if (ret < 0) { 3145 /* rollback */ 3146 for (i--; i >= 0; i--) 3147 rte_eth_dev_mac_addr_remove( 3148 internals->slaves[i].port_id, mac_addr); 3149 goto end; 3150 } 3151 } 3152 3153 ret = 0; 3154 end: 3155 rte_spinlock_unlock(&internals->lock); 3156 return ret; 3157 } 3158 3159 static void 3160 bond_ethdev_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index) 3161 { 3162 struct rte_eth_dev *slave_eth_dev; 3163 struct bond_dev_private *internals = dev->data->dev_private; 3164 int i; 3165 3166 rte_spinlock_lock(&internals->lock); 3167 3168 for (i = 0; i < internals->slave_count; i++) { 3169 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id]; 3170 if (*slave_eth_dev->dev_ops->mac_addr_remove == NULL) 3171 goto end; 3172 } 3173 3174 struct rte_ether_addr *mac_addr = &dev->data->mac_addrs[index]; 3175 3176 for (i = 0; i < internals->slave_count; i++) 3177 rte_eth_dev_mac_addr_remove(internals->slaves[i].port_id, 3178 mac_addr); 3179 3180 end: 3181 rte_spinlock_unlock(&internals->lock); 3182 } 3183 3184 const struct eth_dev_ops default_dev_ops = { 3185 .dev_start = bond_ethdev_start, 3186 .dev_stop = bond_ethdev_stop, 3187 .dev_close = bond_ethdev_close, 3188 .dev_configure = bond_ethdev_configure, 3189 .dev_infos_get = bond_ethdev_info, 3190 .vlan_filter_set = bond_ethdev_vlan_filter_set, 3191 .rx_queue_setup = bond_ethdev_rx_queue_setup, 3192 .tx_queue_setup = bond_ethdev_tx_queue_setup, 3193 .rx_queue_release = bond_ethdev_rx_queue_release, 3194 .tx_queue_release = bond_ethdev_tx_queue_release, 3195 .link_update = bond_ethdev_link_update, 3196 .stats_get = bond_ethdev_stats_get, 3197 .stats_reset = bond_ethdev_stats_reset, 3198 .promiscuous_enable = bond_ethdev_promiscuous_enable, 3199 .promiscuous_disable = bond_ethdev_promiscuous_disable, 3200 .allmulticast_enable = bond_ethdev_allmulticast_enable, 3201 .allmulticast_disable = bond_ethdev_allmulticast_disable, 3202 .reta_update = bond_ethdev_rss_reta_update, 3203 .reta_query = bond_ethdev_rss_reta_query, 3204 .rss_hash_update = bond_ethdev_rss_hash_update, 3205 .rss_hash_conf_get = bond_ethdev_rss_hash_conf_get, 3206 .mtu_set = bond_ethdev_mtu_set, 3207 .mac_addr_set = bond_ethdev_mac_address_set, 3208 .mac_addr_add = bond_ethdev_mac_addr_add, 3209 .mac_addr_remove = bond_ethdev_mac_addr_remove, 3210 .flow_ops_get = bond_flow_ops_get 3211 }; 3212 3213 static int 3214 bond_alloc(struct rte_vdev_device *dev, uint8_t mode) 3215 { 3216 const char *name = rte_vdev_device_name(dev); 3217 uint8_t socket_id = dev->device.numa_node; 3218 struct bond_dev_private *internals = NULL; 3219 struct rte_eth_dev *eth_dev = NULL; 3220 uint32_t vlan_filter_bmp_size; 3221 3222 /* now do all data allocation - for eth_dev structure, dummy pci driver 3223 * and internal (private) data 3224 */ 3225 3226 /* reserve an ethdev entry */ 3227 eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internals)); 3228 if (eth_dev == NULL) { 3229 RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev"); 3230 goto err; 3231 } 3232 3233 internals = eth_dev->data->dev_private; 3234 eth_dev->data->nb_rx_queues = (uint16_t)1; 3235 eth_dev->data->nb_tx_queues = (uint16_t)1; 3236 3237 /* Allocate memory for storing MAC addresses */ 3238 eth_dev->data->mac_addrs = rte_zmalloc_socket(name, RTE_ETHER_ADDR_LEN * 3239 BOND_MAX_MAC_ADDRS, 0, socket_id); 3240 if (eth_dev->data->mac_addrs == NULL) { 3241 RTE_BOND_LOG(ERR, 3242 "Failed to allocate %u bytes needed to store MAC addresses", 3243 RTE_ETHER_ADDR_LEN * BOND_MAX_MAC_ADDRS); 3244 goto err; 3245 } 3246 3247 eth_dev->dev_ops = &default_dev_ops; 3248 eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC | 3249 RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS; 3250 3251 rte_spinlock_init(&internals->lock); 3252 rte_spinlock_init(&internals->lsc_lock); 3253 3254 internals->port_id = eth_dev->data->port_id; 3255 internals->mode = BONDING_MODE_INVALID; 3256 internals->current_primary_port = RTE_MAX_ETHPORTS + 1; 3257 internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2; 3258 internals->burst_xmit_hash = burst_xmit_l2_hash; 3259 internals->user_defined_mac = 0; 3260 3261 internals->link_status_polling_enabled = 0; 3262 3263 internals->link_status_polling_interval_ms = 3264 DEFAULT_POLLING_INTERVAL_10_MS; 3265 internals->link_down_delay_ms = 0; 3266 internals->link_up_delay_ms = 0; 3267 3268 internals->slave_count = 0; 3269 internals->active_slave_count = 0; 3270 internals->rx_offload_capa = 0; 3271 internals->tx_offload_capa = 0; 3272 internals->rx_queue_offload_capa = 0; 3273 internals->tx_queue_offload_capa = 0; 3274 internals->candidate_max_rx_pktlen = 0; 3275 internals->max_rx_pktlen = 0; 3276 3277 /* Initially allow to choose any offload type */ 3278 internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK; 3279 3280 memset(&internals->default_rxconf, 0, 3281 sizeof(internals->default_rxconf)); 3282 memset(&internals->default_txconf, 0, 3283 sizeof(internals->default_txconf)); 3284 3285 memset(&internals->rx_desc_lim, 0, sizeof(internals->rx_desc_lim)); 3286 memset(&internals->tx_desc_lim, 0, sizeof(internals->tx_desc_lim)); 3287 3288 memset(internals->active_slaves, 0, sizeof(internals->active_slaves)); 3289 memset(internals->slaves, 0, sizeof(internals->slaves)); 3290 3291 TAILQ_INIT(&internals->flow_list); 3292 internals->flow_isolated_valid = 0; 3293 3294 /* Set mode 4 default configuration */ 3295 bond_mode_8023ad_setup(eth_dev, NULL); 3296 if (bond_ethdev_mode_set(eth_dev, mode)) { 3297 RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode to %d", 3298 eth_dev->data->port_id, mode); 3299 goto err; 3300 } 3301 3302 vlan_filter_bmp_size = 3303 rte_bitmap_get_memory_footprint(RTE_ETHER_MAX_VLAN_ID + 1); 3304 internals->vlan_filter_bmpmem = rte_malloc(name, vlan_filter_bmp_size, 3305 RTE_CACHE_LINE_SIZE); 3306 if (internals->vlan_filter_bmpmem == NULL) { 3307 RTE_BOND_LOG(ERR, 3308 "Failed to allocate vlan bitmap for bonded device %u", 3309 eth_dev->data->port_id); 3310 goto err; 3311 } 3312 3313 internals->vlan_filter_bmp = rte_bitmap_init(RTE_ETHER_MAX_VLAN_ID + 1, 3314 internals->vlan_filter_bmpmem, vlan_filter_bmp_size); 3315 if (internals->vlan_filter_bmp == NULL) { 3316 RTE_BOND_LOG(ERR, 3317 "Failed to init vlan bitmap for bonded device %u", 3318 eth_dev->data->port_id); 3319 rte_free(internals->vlan_filter_bmpmem); 3320 goto err; 3321 } 3322 3323 return eth_dev->data->port_id; 3324 3325 err: 3326 rte_free(internals); 3327 if (eth_dev != NULL) 3328 eth_dev->data->dev_private = NULL; 3329 rte_eth_dev_release_port(eth_dev); 3330 return -1; 3331 } 3332 3333 static int 3334 bond_probe(struct rte_vdev_device *dev) 3335 { 3336 const char *name; 3337 struct bond_dev_private *internals; 3338 struct rte_kvargs *kvlist; 3339 uint8_t bonding_mode; 3340 int arg_count, port_id; 3341 int socket_id; 3342 uint8_t agg_mode; 3343 struct rte_eth_dev *eth_dev; 3344 3345 if (!dev) 3346 return -EINVAL; 3347 3348 name = rte_vdev_device_name(dev); 3349 RTE_BOND_LOG(INFO, "Initializing pmd_bond for %s", name); 3350 3351 if (rte_eal_process_type() == RTE_PROC_SECONDARY) { 3352 eth_dev = rte_eth_dev_attach_secondary(name); 3353 if (!eth_dev) { 3354 RTE_BOND_LOG(ERR, "Failed to probe %s", name); 3355 return -1; 3356 } 3357 /* TODO: request info from primary to set up Rx and Tx */ 3358 eth_dev->dev_ops = &default_dev_ops; 3359 eth_dev->device = &dev->device; 3360 rte_eth_dev_probing_finish(eth_dev); 3361 return 0; 3362 } 3363 3364 kvlist = rte_kvargs_parse(rte_vdev_device_args(dev), 3365 pmd_bond_init_valid_arguments); 3366 if (kvlist == NULL) 3367 return -1; 3368 3369 /* Parse link bonding mode */ 3370 if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) { 3371 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG, 3372 &bond_ethdev_parse_slave_mode_kvarg, 3373 &bonding_mode) != 0) { 3374 RTE_BOND_LOG(ERR, "Invalid mode for bonded device %s", 3375 name); 3376 goto parse_error; 3377 } 3378 } else { 3379 RTE_BOND_LOG(ERR, "Mode must be specified only once for bonded " 3380 "device %s", name); 3381 goto parse_error; 3382 } 3383 3384 /* Parse socket id to create bonding device on */ 3385 arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG); 3386 if (arg_count == 1) { 3387 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG, 3388 &bond_ethdev_parse_socket_id_kvarg, &socket_id) 3389 != 0) { 3390 RTE_BOND_LOG(ERR, "Invalid socket Id specified for " 3391 "bonded device %s", name); 3392 goto parse_error; 3393 } 3394 } else if (arg_count > 1) { 3395 RTE_BOND_LOG(ERR, "Socket Id can be specified only once for " 3396 "bonded device %s", name); 3397 goto parse_error; 3398 } else { 3399 socket_id = rte_socket_id(); 3400 } 3401 3402 dev->device.numa_node = socket_id; 3403 3404 /* Create link bonding eth device */ 3405 port_id = bond_alloc(dev, bonding_mode); 3406 if (port_id < 0) { 3407 RTE_BOND_LOG(ERR, "Failed to create socket %s in mode %u on " 3408 "socket %u.", name, bonding_mode, socket_id); 3409 goto parse_error; 3410 } 3411 internals = rte_eth_devices[port_id].data->dev_private; 3412 internals->kvlist = kvlist; 3413 3414 if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) { 3415 if (rte_kvargs_process(kvlist, 3416 PMD_BOND_AGG_MODE_KVARG, 3417 &bond_ethdev_parse_slave_agg_mode_kvarg, 3418 &agg_mode) != 0) { 3419 RTE_BOND_LOG(ERR, 3420 "Failed to parse agg selection mode for bonded device %s", 3421 name); 3422 goto parse_error; 3423 } 3424 3425 if (internals->mode == BONDING_MODE_8023AD) 3426 internals->mode4.agg_selection = agg_mode; 3427 } else { 3428 internals->mode4.agg_selection = AGG_STABLE; 3429 } 3430 3431 rte_eth_dev_probing_finish(&rte_eth_devices[port_id]); 3432 RTE_BOND_LOG(INFO, "Create bonded device %s on port %d in mode %u on " 3433 "socket %u.", name, port_id, bonding_mode, socket_id); 3434 return 0; 3435 3436 parse_error: 3437 rte_kvargs_free(kvlist); 3438 3439 return -1; 3440 } 3441 3442 static int 3443 bond_remove(struct rte_vdev_device *dev) 3444 { 3445 struct rte_eth_dev *eth_dev; 3446 struct bond_dev_private *internals; 3447 const char *name; 3448 int ret = 0; 3449 3450 if (!dev) 3451 return -EINVAL; 3452 3453 name = rte_vdev_device_name(dev); 3454 RTE_BOND_LOG(INFO, "Uninitializing pmd_bond for %s", name); 3455 3456 /* find an ethdev entry */ 3457 eth_dev = rte_eth_dev_allocated(name); 3458 if (eth_dev == NULL) 3459 return 0; /* port already released */ 3460 3461 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 3462 return rte_eth_dev_release_port(eth_dev); 3463 3464 RTE_ASSERT(eth_dev->device == &dev->device); 3465 3466 internals = eth_dev->data->dev_private; 3467 if (internals->slave_count != 0) 3468 return -EBUSY; 3469 3470 if (eth_dev->data->dev_started == 1) { 3471 ret = bond_ethdev_stop(eth_dev); 3472 bond_ethdev_close(eth_dev); 3473 } 3474 if (internals->kvlist != NULL) 3475 rte_kvargs_free(internals->kvlist); 3476 rte_eth_dev_release_port(eth_dev); 3477 3478 return ret; 3479 } 3480 3481 /* this part will resolve the slave portids after all the other pdev and vdev 3482 * have been allocated */ 3483 static int 3484 bond_ethdev_configure(struct rte_eth_dev *dev) 3485 { 3486 const char *name = dev->device->name; 3487 struct bond_dev_private *internals = dev->data->dev_private; 3488 struct rte_kvargs *kvlist = internals->kvlist; 3489 int arg_count; 3490 uint16_t port_id = dev - rte_eth_devices; 3491 uint8_t agg_mode; 3492 3493 static const uint8_t default_rss_key[40] = { 3494 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D, 3495 0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4, 3496 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B, 3497 0xBE, 0xAC, 0x01, 0xFA 3498 }; 3499 3500 unsigned i, j; 3501 3502 /* 3503 * If RSS is enabled, fill table with default values and 3504 * set key to the the value specified in port RSS configuration. 3505 * Fall back to default RSS key if the key is not specified 3506 */ 3507 if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) { 3508 if (dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key != NULL) { 3509 internals->rss_key_len = 3510 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len; 3511 memcpy(internals->rss_key, 3512 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key, 3513 internals->rss_key_len); 3514 } else { 3515 internals->rss_key_len = sizeof(default_rss_key); 3516 memcpy(internals->rss_key, default_rss_key, 3517 internals->rss_key_len); 3518 } 3519 3520 for (i = 0; i < RTE_DIM(internals->reta_conf); i++) { 3521 internals->reta_conf[i].mask = ~0LL; 3522 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++) 3523 internals->reta_conf[i].reta[j] = 3524 (i * RTE_RETA_GROUP_SIZE + j) % 3525 dev->data->nb_rx_queues; 3526 } 3527 } 3528 3529 /* set the max_rx_pktlen */ 3530 internals->max_rx_pktlen = internals->candidate_max_rx_pktlen; 3531 3532 /* 3533 * if no kvlist, it means that this bonded device has been created 3534 * through the bonding api. 3535 */ 3536 if (!kvlist) 3537 return 0; 3538 3539 /* Parse MAC address for bonded device */ 3540 arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG); 3541 if (arg_count == 1) { 3542 struct rte_ether_addr bond_mac; 3543 3544 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG, 3545 &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) { 3546 RTE_BOND_LOG(INFO, "Invalid mac address for bonded device %s", 3547 name); 3548 return -1; 3549 } 3550 3551 /* Set MAC address */ 3552 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) { 3553 RTE_BOND_LOG(ERR, 3554 "Failed to set mac address on bonded device %s", 3555 name); 3556 return -1; 3557 } 3558 } else if (arg_count > 1) { 3559 RTE_BOND_LOG(ERR, 3560 "MAC address can be specified only once for bonded device %s", 3561 name); 3562 return -1; 3563 } 3564 3565 /* Parse/set balance mode transmit policy */ 3566 arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG); 3567 if (arg_count == 1) { 3568 uint8_t xmit_policy; 3569 3570 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG, 3571 &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) != 3572 0) { 3573 RTE_BOND_LOG(INFO, 3574 "Invalid xmit policy specified for bonded device %s", 3575 name); 3576 return -1; 3577 } 3578 3579 /* Set balance mode transmit policy*/ 3580 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) { 3581 RTE_BOND_LOG(ERR, 3582 "Failed to set balance xmit policy on bonded device %s", 3583 name); 3584 return -1; 3585 } 3586 } else if (arg_count > 1) { 3587 RTE_BOND_LOG(ERR, 3588 "Transmit policy can be specified only once for bonded device %s", 3589 name); 3590 return -1; 3591 } 3592 3593 if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) { 3594 if (rte_kvargs_process(kvlist, 3595 PMD_BOND_AGG_MODE_KVARG, 3596 &bond_ethdev_parse_slave_agg_mode_kvarg, 3597 &agg_mode) != 0) { 3598 RTE_BOND_LOG(ERR, 3599 "Failed to parse agg selection mode for bonded device %s", 3600 name); 3601 } 3602 if (internals->mode == BONDING_MODE_8023AD) { 3603 int ret = rte_eth_bond_8023ad_agg_selection_set(port_id, 3604 agg_mode); 3605 if (ret < 0) { 3606 RTE_BOND_LOG(ERR, 3607 "Invalid args for agg selection set for bonded device %s", 3608 name); 3609 return -1; 3610 } 3611 } 3612 } 3613 3614 /* Parse/add slave ports to bonded device */ 3615 if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) { 3616 struct bond_ethdev_slave_ports slave_ports; 3617 unsigned i; 3618 3619 memset(&slave_ports, 0, sizeof(slave_ports)); 3620 3621 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG, 3622 &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) { 3623 RTE_BOND_LOG(ERR, 3624 "Failed to parse slave ports for bonded device %s", 3625 name); 3626 return -1; 3627 } 3628 3629 for (i = 0; i < slave_ports.slave_count; i++) { 3630 if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) { 3631 RTE_BOND_LOG(ERR, 3632 "Failed to add port %d as slave to bonded device %s", 3633 slave_ports.slaves[i], name); 3634 } 3635 } 3636 3637 } else { 3638 RTE_BOND_LOG(INFO, "No slaves specified for bonded device %s", name); 3639 return -1; 3640 } 3641 3642 /* Parse/set primary slave port id*/ 3643 arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG); 3644 if (arg_count == 1) { 3645 uint16_t primary_slave_port_id; 3646 3647 if (rte_kvargs_process(kvlist, 3648 PMD_BOND_PRIMARY_SLAVE_KVARG, 3649 &bond_ethdev_parse_primary_slave_port_id_kvarg, 3650 &primary_slave_port_id) < 0) { 3651 RTE_BOND_LOG(INFO, 3652 "Invalid primary slave port id specified for bonded device %s", 3653 name); 3654 return -1; 3655 } 3656 3657 /* Set balance mode transmit policy*/ 3658 if (rte_eth_bond_primary_set(port_id, primary_slave_port_id) 3659 != 0) { 3660 RTE_BOND_LOG(ERR, 3661 "Failed to set primary slave port %d on bonded device %s", 3662 primary_slave_port_id, name); 3663 return -1; 3664 } 3665 } else if (arg_count > 1) { 3666 RTE_BOND_LOG(INFO, 3667 "Primary slave can be specified only once for bonded device %s", 3668 name); 3669 return -1; 3670 } 3671 3672 /* Parse link status monitor polling interval */ 3673 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG); 3674 if (arg_count == 1) { 3675 uint32_t lsc_poll_interval_ms; 3676 3677 if (rte_kvargs_process(kvlist, 3678 PMD_BOND_LSC_POLL_PERIOD_KVARG, 3679 &bond_ethdev_parse_time_ms_kvarg, 3680 &lsc_poll_interval_ms) < 0) { 3681 RTE_BOND_LOG(INFO, 3682 "Invalid lsc polling interval value specified for bonded" 3683 " device %s", name); 3684 return -1; 3685 } 3686 3687 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms) 3688 != 0) { 3689 RTE_BOND_LOG(ERR, 3690 "Failed to set lsc monitor polling interval (%u ms) on bonded device %s", 3691 lsc_poll_interval_ms, name); 3692 return -1; 3693 } 3694 } else if (arg_count > 1) { 3695 RTE_BOND_LOG(INFO, 3696 "LSC polling interval can be specified only once for bonded" 3697 " device %s", name); 3698 return -1; 3699 } 3700 3701 /* Parse link up interrupt propagation delay */ 3702 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG); 3703 if (arg_count == 1) { 3704 uint32_t link_up_delay_ms; 3705 3706 if (rte_kvargs_process(kvlist, 3707 PMD_BOND_LINK_UP_PROP_DELAY_KVARG, 3708 &bond_ethdev_parse_time_ms_kvarg, 3709 &link_up_delay_ms) < 0) { 3710 RTE_BOND_LOG(INFO, 3711 "Invalid link up propagation delay value specified for" 3712 " bonded device %s", name); 3713 return -1; 3714 } 3715 3716 /* Set balance mode transmit policy*/ 3717 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms) 3718 != 0) { 3719 RTE_BOND_LOG(ERR, 3720 "Failed to set link up propagation delay (%u ms) on bonded" 3721 " device %s", link_up_delay_ms, name); 3722 return -1; 3723 } 3724 } else if (arg_count > 1) { 3725 RTE_BOND_LOG(INFO, 3726 "Link up propagation delay can be specified only once for" 3727 " bonded device %s", name); 3728 return -1; 3729 } 3730 3731 /* Parse link down interrupt propagation delay */ 3732 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG); 3733 if (arg_count == 1) { 3734 uint32_t link_down_delay_ms; 3735 3736 if (rte_kvargs_process(kvlist, 3737 PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG, 3738 &bond_ethdev_parse_time_ms_kvarg, 3739 &link_down_delay_ms) < 0) { 3740 RTE_BOND_LOG(INFO, 3741 "Invalid link down propagation delay value specified for" 3742 " bonded device %s", name); 3743 return -1; 3744 } 3745 3746 /* Set balance mode transmit policy*/ 3747 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms) 3748 != 0) { 3749 RTE_BOND_LOG(ERR, 3750 "Failed to set link down propagation delay (%u ms) on bonded device %s", 3751 link_down_delay_ms, name); 3752 return -1; 3753 } 3754 } else if (arg_count > 1) { 3755 RTE_BOND_LOG(INFO, 3756 "Link down propagation delay can be specified only once for bonded device %s", 3757 name); 3758 return -1; 3759 } 3760 3761 return 0; 3762 } 3763 3764 struct rte_vdev_driver pmd_bond_drv = { 3765 .probe = bond_probe, 3766 .remove = bond_remove, 3767 }; 3768 3769 RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv); 3770 RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond); 3771 3772 RTE_PMD_REGISTER_PARAM_STRING(net_bonding, 3773 "slave=<ifc> " 3774 "primary=<ifc> " 3775 "mode=[0-6] " 3776 "xmit_policy=[l2 | l23 | l34] " 3777 "agg_mode=[count | stable | bandwidth] " 3778 "socket_id=<int> " 3779 "mac=<mac addr> " 3780 "lsc_poll_period_ms=<int> " 3781 "up_delay=<int> " 3782 "down_delay=<int>"); 3783 3784 /* We can't use RTE_LOG_REGISTER_DEFAULT because of the forced name for 3785 * this library, see meson.build. 3786 */ 3787 RTE_LOG_REGISTER(bond_logtype, pmd.net.bonding, NOTICE); 3788