1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2017 Intel Corporation 3 */ 4 #include <stdlib.h> 5 #include <netinet/in.h> 6 7 #include <rte_mbuf.h> 8 #include <rte_malloc.h> 9 #include <rte_ethdev_driver.h> 10 #include <rte_ethdev_vdev.h> 11 #include <rte_tcp.h> 12 #include <rte_udp.h> 13 #include <rte_ip.h> 14 #include <rte_ip_frag.h> 15 #include <rte_devargs.h> 16 #include <rte_kvargs.h> 17 #include <rte_bus_vdev.h> 18 #include <rte_alarm.h> 19 #include <rte_cycles.h> 20 #include <rte_string_fns.h> 21 22 #include "rte_eth_bond.h" 23 #include "rte_eth_bond_private.h" 24 #include "rte_eth_bond_8023ad_private.h" 25 26 #define REORDER_PERIOD_MS 10 27 #define DEFAULT_POLLING_INTERVAL_10_MS (10) 28 29 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port) 30 31 /* Table for statistics in mode 5 TLB */ 32 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS]; 33 34 static inline size_t 35 get_vlan_offset(struct ether_hdr *eth_hdr, uint16_t *proto) 36 { 37 size_t vlan_offset = 0; 38 39 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) { 40 struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1); 41 42 vlan_offset = sizeof(struct vlan_hdr); 43 *proto = vlan_hdr->eth_proto; 44 45 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) { 46 vlan_hdr = vlan_hdr + 1; 47 *proto = vlan_hdr->eth_proto; 48 vlan_offset += sizeof(struct vlan_hdr); 49 } 50 } 51 return vlan_offset; 52 } 53 54 static uint16_t 55 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) 56 { 57 struct bond_dev_private *internals; 58 59 uint16_t num_rx_slave = 0; 60 uint16_t num_rx_total = 0; 61 62 int i; 63 64 /* Cast to structure, containing bonded device's port id and queue id */ 65 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue; 66 67 internals = bd_rx_q->dev_private; 68 69 70 for (i = 0; i < internals->active_slave_count && nb_pkts; i++) { 71 /* Offset of pointer to *bufs increases as packets are received 72 * from other slaves */ 73 num_rx_slave = rte_eth_rx_burst(internals->active_slaves[i], 74 bd_rx_q->queue_id, bufs + num_rx_total, nb_pkts); 75 if (num_rx_slave) { 76 num_rx_total += num_rx_slave; 77 nb_pkts -= num_rx_slave; 78 } 79 } 80 81 return num_rx_total; 82 } 83 84 static uint16_t 85 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs, 86 uint16_t nb_pkts) 87 { 88 struct bond_dev_private *internals; 89 90 /* Cast to structure, containing bonded device's port id and queue id */ 91 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue; 92 93 internals = bd_rx_q->dev_private; 94 95 return rte_eth_rx_burst(internals->current_primary_port, 96 bd_rx_q->queue_id, bufs, nb_pkts); 97 } 98 99 static inline uint8_t 100 is_lacp_packets(uint16_t ethertype, uint8_t subtype, struct rte_mbuf *mbuf) 101 { 102 const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW); 103 104 return !((mbuf->ol_flags & PKT_RX_VLAN) ? mbuf->vlan_tci : 0) && 105 (ethertype == ether_type_slow_be && 106 (subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP)); 107 } 108 109 /***************************************************************************** 110 * Flow director's setup for mode 4 optimization 111 */ 112 113 static struct rte_flow_item_eth flow_item_eth_type_8023ad = { 114 .dst.addr_bytes = { 0 }, 115 .src.addr_bytes = { 0 }, 116 .type = RTE_BE16(ETHER_TYPE_SLOW), 117 }; 118 119 static struct rte_flow_item_eth flow_item_eth_mask_type_8023ad = { 120 .dst.addr_bytes = { 0 }, 121 .src.addr_bytes = { 0 }, 122 .type = 0xFFFF, 123 }; 124 125 static struct rte_flow_item flow_item_8023ad[] = { 126 { 127 .type = RTE_FLOW_ITEM_TYPE_ETH, 128 .spec = &flow_item_eth_type_8023ad, 129 .last = NULL, 130 .mask = &flow_item_eth_mask_type_8023ad, 131 }, 132 { 133 .type = RTE_FLOW_ITEM_TYPE_END, 134 .spec = NULL, 135 .last = NULL, 136 .mask = NULL, 137 } 138 }; 139 140 const struct rte_flow_attr flow_attr_8023ad = { 141 .group = 0, 142 .priority = 0, 143 .ingress = 1, 144 .egress = 0, 145 .reserved = 0, 146 }; 147 148 int 149 bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev, 150 uint16_t slave_port) { 151 struct rte_eth_dev_info slave_info; 152 struct rte_flow_error error; 153 struct bond_dev_private *internals = (struct bond_dev_private *) 154 (bond_dev->data->dev_private); 155 156 const struct rte_flow_action_queue lacp_queue_conf = { 157 .index = 0, 158 }; 159 160 const struct rte_flow_action actions[] = { 161 { 162 .type = RTE_FLOW_ACTION_TYPE_QUEUE, 163 .conf = &lacp_queue_conf 164 }, 165 { 166 .type = RTE_FLOW_ACTION_TYPE_END, 167 } 168 }; 169 170 int ret = rte_flow_validate(slave_port, &flow_attr_8023ad, 171 flow_item_8023ad, actions, &error); 172 if (ret < 0) { 173 RTE_BOND_LOG(ERR, "%s: %s (slave_port=%d queue_id=%d)", 174 __func__, error.message, slave_port, 175 internals->mode4.dedicated_queues.rx_qid); 176 return -1; 177 } 178 179 rte_eth_dev_info_get(slave_port, &slave_info); 180 if (slave_info.max_rx_queues < bond_dev->data->nb_rx_queues || 181 slave_info.max_tx_queues < bond_dev->data->nb_tx_queues) { 182 RTE_BOND_LOG(ERR, 183 "%s: Slave %d capabilities doesn't allow to allocate additional queues", 184 __func__, slave_port); 185 return -1; 186 } 187 188 return 0; 189 } 190 191 int 192 bond_8023ad_slow_pkt_hw_filter_supported(uint16_t port_id) { 193 struct rte_eth_dev *bond_dev = &rte_eth_devices[port_id]; 194 struct bond_dev_private *internals = (struct bond_dev_private *) 195 (bond_dev->data->dev_private); 196 struct rte_eth_dev_info bond_info; 197 uint16_t idx; 198 199 /* Verify if all slaves in bonding supports flow director and */ 200 if (internals->slave_count > 0) { 201 rte_eth_dev_info_get(bond_dev->data->port_id, &bond_info); 202 203 internals->mode4.dedicated_queues.rx_qid = bond_info.nb_rx_queues; 204 internals->mode4.dedicated_queues.tx_qid = bond_info.nb_tx_queues; 205 206 for (idx = 0; idx < internals->slave_count; idx++) { 207 if (bond_ethdev_8023ad_flow_verify(bond_dev, 208 internals->slaves[idx].port_id) != 0) 209 return -1; 210 } 211 } 212 213 return 0; 214 } 215 216 int 217 bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint16_t slave_port) { 218 219 struct rte_flow_error error; 220 struct bond_dev_private *internals = (struct bond_dev_private *) 221 (bond_dev->data->dev_private); 222 223 struct rte_flow_action_queue lacp_queue_conf = { 224 .index = internals->mode4.dedicated_queues.rx_qid, 225 }; 226 227 const struct rte_flow_action actions[] = { 228 { 229 .type = RTE_FLOW_ACTION_TYPE_QUEUE, 230 .conf = &lacp_queue_conf 231 }, 232 { 233 .type = RTE_FLOW_ACTION_TYPE_END, 234 } 235 }; 236 237 internals->mode4.dedicated_queues.flow[slave_port] = rte_flow_create(slave_port, 238 &flow_attr_8023ad, flow_item_8023ad, actions, &error); 239 if (internals->mode4.dedicated_queues.flow[slave_port] == NULL) { 240 RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_set: %s " 241 "(slave_port=%d queue_id=%d)", 242 error.message, slave_port, 243 internals->mode4.dedicated_queues.rx_qid); 244 return -1; 245 } 246 247 return 0; 248 } 249 250 static uint16_t 251 bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs, 252 uint16_t nb_pkts) 253 { 254 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue; 255 struct bond_dev_private *internals = bd_rx_q->dev_private; 256 uint16_t num_rx_total = 0; /* Total number of received packets */ 257 uint16_t slaves[RTE_MAX_ETHPORTS]; 258 uint16_t slave_count; 259 260 uint16_t i, idx; 261 262 /* Copy slave list to protect against slave up/down changes during tx 263 * bursting */ 264 slave_count = internals->active_slave_count; 265 memcpy(slaves, internals->active_slaves, 266 sizeof(internals->active_slaves[0]) * slave_count); 267 268 for (i = 0, idx = internals->active_slave; 269 i < slave_count && num_rx_total < nb_pkts; i++, idx++) { 270 idx = idx % slave_count; 271 272 /* Read packets from this slave */ 273 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id, 274 &bufs[num_rx_total], nb_pkts - num_rx_total); 275 } 276 277 internals->active_slave = idx; 278 279 return num_rx_total; 280 } 281 282 static uint16_t 283 bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs, 284 uint16_t nb_bufs) 285 { 286 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue; 287 struct bond_dev_private *internals = bd_tx_q->dev_private; 288 289 uint16_t slave_port_ids[RTE_MAX_ETHPORTS]; 290 uint16_t slave_count; 291 292 uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS]; 293 uint16_t dist_slave_count; 294 295 /* 2-D array to sort mbufs for transmission on each slave into */ 296 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs]; 297 /* Number of mbufs for transmission on each slave */ 298 uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 }; 299 /* Mapping array generated by hash function to map mbufs to slaves */ 300 uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 }; 301 302 uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = { 0 }; 303 uint16_t total_tx_count = 0, total_tx_fail_count = 0; 304 305 uint16_t i, j; 306 307 if (unlikely(nb_bufs == 0)) 308 return 0; 309 310 /* Copy slave list to protect against slave up/down changes during tx 311 * bursting */ 312 slave_count = internals->active_slave_count; 313 if (unlikely(slave_count < 1)) 314 return 0; 315 316 memcpy(slave_port_ids, internals->active_slaves, 317 sizeof(slave_port_ids[0]) * slave_count); 318 319 320 dist_slave_count = 0; 321 for (i = 0; i < slave_count; i++) { 322 struct port *port = &mode_8023ad_ports[slave_port_ids[i]]; 323 324 if (ACTOR_STATE(port, DISTRIBUTING)) 325 dist_slave_port_ids[dist_slave_count++] = 326 slave_port_ids[i]; 327 } 328 329 if (unlikely(dist_slave_count < 1)) 330 return 0; 331 332 /* 333 * Populate slaves mbuf with the packets which are to be sent on it 334 * selecting output slave using hash based on xmit policy 335 */ 336 internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count, 337 bufs_slave_port_idxs); 338 339 for (i = 0; i < nb_bufs; i++) { 340 /* Populate slave mbuf arrays with mbufs for that slave. */ 341 uint8_t slave_idx = bufs_slave_port_idxs[i]; 342 343 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i]; 344 } 345 346 347 /* Send packet burst on each slave device */ 348 for (i = 0; i < dist_slave_count; i++) { 349 if (slave_nb_bufs[i] == 0) 350 continue; 351 352 slave_tx_count = rte_eth_tx_burst(dist_slave_port_ids[i], 353 bd_tx_q->queue_id, slave_bufs[i], 354 slave_nb_bufs[i]); 355 356 total_tx_count += slave_tx_count; 357 358 /* If tx burst fails move packets to end of bufs */ 359 if (unlikely(slave_tx_count < slave_nb_bufs[i])) { 360 slave_tx_fail_count[i] = slave_nb_bufs[i] - 361 slave_tx_count; 362 total_tx_fail_count += slave_tx_fail_count[i]; 363 364 /* 365 * Shift bufs to beginning of array to allow reordering 366 * later 367 */ 368 for (j = 0; j < slave_tx_fail_count[i]; j++) { 369 slave_bufs[i][j] = 370 slave_bufs[i][(slave_tx_count - 1) + j]; 371 } 372 } 373 } 374 375 /* 376 * If there are tx burst failures we move packets to end of bufs to 377 * preserve expected PMD behaviour of all failed transmitted being 378 * at the end of the input mbuf array 379 */ 380 if (unlikely(total_tx_fail_count > 0)) { 381 int bufs_idx = nb_bufs - total_tx_fail_count - 1; 382 383 for (i = 0; i < slave_count; i++) { 384 if (slave_tx_fail_count[i] > 0) { 385 for (j = 0; j < slave_tx_fail_count[i]; j++) 386 bufs[bufs_idx++] = slave_bufs[i][j]; 387 } 388 } 389 } 390 391 return total_tx_count; 392 } 393 394 395 static uint16_t 396 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs, 397 uint16_t nb_pkts) 398 { 399 /* Cast to structure, containing bonded device's port id and queue id */ 400 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue; 401 struct bond_dev_private *internals = bd_rx_q->dev_private; 402 struct ether_addr bond_mac; 403 404 struct ether_hdr *hdr; 405 406 const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW); 407 uint16_t num_rx_total = 0; /* Total number of received packets */ 408 uint16_t slaves[RTE_MAX_ETHPORTS]; 409 uint16_t slave_count, idx; 410 411 uint8_t collecting; /* current slave collecting status */ 412 const uint8_t promisc = internals->promiscuous_en; 413 uint8_t i, j, k; 414 uint8_t subtype; 415 416 rte_eth_macaddr_get(internals->port_id, &bond_mac); 417 /* Copy slave list to protect against slave up/down changes during tx 418 * bursting */ 419 slave_count = internals->active_slave_count; 420 memcpy(slaves, internals->active_slaves, 421 sizeof(internals->active_slaves[0]) * slave_count); 422 423 idx = internals->active_slave; 424 if (idx >= slave_count) { 425 internals->active_slave = 0; 426 idx = 0; 427 } 428 for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) { 429 j = num_rx_total; 430 collecting = ACTOR_STATE(&mode_8023ad_ports[slaves[idx]], 431 COLLECTING); 432 433 /* Read packets from this slave */ 434 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id, 435 &bufs[num_rx_total], nb_pkts - num_rx_total); 436 437 for (k = j; k < 2 && k < num_rx_total; k++) 438 rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *)); 439 440 /* Handle slow protocol packets. */ 441 while (j < num_rx_total) { 442 443 /* If packet is not pure L2 and is known, skip it */ 444 if ((bufs[j]->packet_type & ~RTE_PTYPE_L2_ETHER) != 0) { 445 j++; 446 continue; 447 } 448 449 if (j + 3 < num_rx_total) 450 rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *)); 451 452 hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *); 453 subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype; 454 455 /* Remove packet from array if it is slow packet or slave is not 456 * in collecting state or bonding interface is not in promiscuous 457 * mode and packet address does not match. */ 458 if (unlikely(is_lacp_packets(hdr->ether_type, subtype, bufs[j]) || 459 !collecting || (!promisc && 460 !is_multicast_ether_addr(&hdr->d_addr) && 461 !is_same_ether_addr(&bond_mac, &hdr->d_addr)))) { 462 463 if (hdr->ether_type == ether_type_slow_be) { 464 bond_mode_8023ad_handle_slow_pkt( 465 internals, slaves[idx], bufs[j]); 466 } else 467 rte_pktmbuf_free(bufs[j]); 468 469 /* Packet is managed by mode 4 or dropped, shift the array */ 470 num_rx_total--; 471 if (j < num_rx_total) { 472 memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) * 473 (num_rx_total - j)); 474 } 475 } else 476 j++; 477 } 478 if (unlikely(++idx == slave_count)) 479 idx = 0; 480 } 481 482 internals->active_slave = idx; 483 return num_rx_total; 484 } 485 486 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1) 487 uint32_t burstnumberRX; 488 uint32_t burstnumberTX; 489 490 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB 491 492 static void 493 arp_op_name(uint16_t arp_op, char *buf) 494 { 495 switch (arp_op) { 496 case ARP_OP_REQUEST: 497 snprintf(buf, sizeof("ARP Request"), "%s", "ARP Request"); 498 return; 499 case ARP_OP_REPLY: 500 snprintf(buf, sizeof("ARP Reply"), "%s", "ARP Reply"); 501 return; 502 case ARP_OP_REVREQUEST: 503 snprintf(buf, sizeof("Reverse ARP Request"), "%s", 504 "Reverse ARP Request"); 505 return; 506 case ARP_OP_REVREPLY: 507 snprintf(buf, sizeof("Reverse ARP Reply"), "%s", 508 "Reverse ARP Reply"); 509 return; 510 case ARP_OP_INVREQUEST: 511 snprintf(buf, sizeof("Peer Identify Request"), "%s", 512 "Peer Identify Request"); 513 return; 514 case ARP_OP_INVREPLY: 515 snprintf(buf, sizeof("Peer Identify Reply"), "%s", 516 "Peer Identify Reply"); 517 return; 518 default: 519 break; 520 } 521 snprintf(buf, sizeof("Unknown"), "%s", "Unknown"); 522 return; 523 } 524 #endif 525 #define MaxIPv4String 16 526 static void 527 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size) 528 { 529 uint32_t ipv4_addr; 530 531 ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr); 532 snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF, 533 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF, 534 ipv4_addr & 0xFF); 535 } 536 537 #define MAX_CLIENTS_NUMBER 128 538 uint8_t active_clients; 539 struct client_stats_t { 540 uint16_t port; 541 uint32_t ipv4_addr; 542 uint32_t ipv4_rx_packets; 543 uint32_t ipv4_tx_packets; 544 }; 545 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER]; 546 547 static void 548 update_client_stats(uint32_t addr, uint16_t port, uint32_t *TXorRXindicator) 549 { 550 int i = 0; 551 552 for (; i < MAX_CLIENTS_NUMBER; i++) { 553 if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port)) { 554 /* Just update RX packets number for this client */ 555 if (TXorRXindicator == &burstnumberRX) 556 client_stats[i].ipv4_rx_packets++; 557 else 558 client_stats[i].ipv4_tx_packets++; 559 return; 560 } 561 } 562 /* We have a new client. Insert him to the table, and increment stats */ 563 if (TXorRXindicator == &burstnumberRX) 564 client_stats[active_clients].ipv4_rx_packets++; 565 else 566 client_stats[active_clients].ipv4_tx_packets++; 567 client_stats[active_clients].ipv4_addr = addr; 568 client_stats[active_clients].port = port; 569 active_clients++; 570 571 } 572 573 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB 574 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber) \ 575 RTE_LOG(DEBUG, PMD, \ 576 "%s " \ 577 "port:%d " \ 578 "SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X " \ 579 "SrcIP:%s " \ 580 "DstMAC:%02X:%02X:%02X:%02X:%02X:%02X " \ 581 "DstIP:%s " \ 582 "%s " \ 583 "%d\n", \ 584 info, \ 585 port, \ 586 eth_h->s_addr.addr_bytes[0], \ 587 eth_h->s_addr.addr_bytes[1], \ 588 eth_h->s_addr.addr_bytes[2], \ 589 eth_h->s_addr.addr_bytes[3], \ 590 eth_h->s_addr.addr_bytes[4], \ 591 eth_h->s_addr.addr_bytes[5], \ 592 src_ip, \ 593 eth_h->d_addr.addr_bytes[0], \ 594 eth_h->d_addr.addr_bytes[1], \ 595 eth_h->d_addr.addr_bytes[2], \ 596 eth_h->d_addr.addr_bytes[3], \ 597 eth_h->d_addr.addr_bytes[4], \ 598 eth_h->d_addr.addr_bytes[5], \ 599 dst_ip, \ 600 arp_op, \ 601 ++burstnumber) 602 #endif 603 604 static void 605 mode6_debug(const char __attribute__((unused)) *info, struct ether_hdr *eth_h, 606 uint16_t port, uint32_t __attribute__((unused)) *burstnumber) 607 { 608 struct ipv4_hdr *ipv4_h; 609 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB 610 struct arp_hdr *arp_h; 611 char dst_ip[16]; 612 char ArpOp[24]; 613 char buf[16]; 614 #endif 615 char src_ip[16]; 616 617 uint16_t ether_type = eth_h->ether_type; 618 uint16_t offset = get_vlan_offset(eth_h, ðer_type); 619 620 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB 621 strlcpy(buf, info, 16); 622 #endif 623 624 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) { 625 ipv4_h = (struct ipv4_hdr *)((char *)(eth_h + 1) + offset); 626 ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String); 627 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB 628 ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String); 629 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber); 630 #endif 631 update_client_stats(ipv4_h->src_addr, port, burstnumber); 632 } 633 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB 634 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) { 635 arp_h = (struct arp_hdr *)((char *)(eth_h + 1) + offset); 636 ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String); 637 ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String); 638 arp_op_name(rte_be_to_cpu_16(arp_h->arp_op), ArpOp); 639 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber); 640 } 641 #endif 642 } 643 #endif 644 645 static uint16_t 646 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) 647 { 648 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue; 649 struct bond_dev_private *internals = bd_tx_q->dev_private; 650 struct ether_hdr *eth_h; 651 uint16_t ether_type, offset; 652 uint16_t nb_recv_pkts; 653 int i; 654 655 nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts); 656 657 for (i = 0; i < nb_recv_pkts; i++) { 658 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *); 659 ether_type = eth_h->ether_type; 660 offset = get_vlan_offset(eth_h, ðer_type); 661 662 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) { 663 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1) 664 mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX); 665 #endif 666 bond_mode_alb_arp_recv(eth_h, offset, internals); 667 } 668 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1) 669 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) 670 mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX); 671 #endif 672 } 673 674 return nb_recv_pkts; 675 } 676 677 static uint16_t 678 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs, 679 uint16_t nb_pkts) 680 { 681 struct bond_dev_private *internals; 682 struct bond_tx_queue *bd_tx_q; 683 684 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts]; 685 uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 }; 686 687 uint16_t num_of_slaves; 688 uint16_t slaves[RTE_MAX_ETHPORTS]; 689 690 uint16_t num_tx_total = 0, num_tx_slave; 691 692 static int slave_idx = 0; 693 int i, cslave_idx = 0, tx_fail_total = 0; 694 695 bd_tx_q = (struct bond_tx_queue *)queue; 696 internals = bd_tx_q->dev_private; 697 698 /* Copy slave list to protect against slave up/down changes during tx 699 * bursting */ 700 num_of_slaves = internals->active_slave_count; 701 memcpy(slaves, internals->active_slaves, 702 sizeof(internals->active_slaves[0]) * num_of_slaves); 703 704 if (num_of_slaves < 1) 705 return num_tx_total; 706 707 /* Populate slaves mbuf with which packets are to be sent on it */ 708 for (i = 0; i < nb_pkts; i++) { 709 cslave_idx = (slave_idx + i) % num_of_slaves; 710 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i]; 711 } 712 713 /* increment current slave index so the next call to tx burst starts on the 714 * next slave */ 715 slave_idx = ++cslave_idx; 716 717 /* Send packet burst on each slave device */ 718 for (i = 0; i < num_of_slaves; i++) { 719 if (slave_nb_pkts[i] > 0) { 720 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id, 721 slave_bufs[i], slave_nb_pkts[i]); 722 723 /* if tx burst fails move packets to end of bufs */ 724 if (unlikely(num_tx_slave < slave_nb_pkts[i])) { 725 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave; 726 727 tx_fail_total += tx_fail_slave; 728 729 memcpy(&bufs[nb_pkts - tx_fail_total], 730 &slave_bufs[i][num_tx_slave], 731 tx_fail_slave * sizeof(bufs[0])); 732 } 733 num_tx_total += num_tx_slave; 734 } 735 } 736 737 return num_tx_total; 738 } 739 740 static uint16_t 741 bond_ethdev_tx_burst_active_backup(void *queue, 742 struct rte_mbuf **bufs, uint16_t nb_pkts) 743 { 744 struct bond_dev_private *internals; 745 struct bond_tx_queue *bd_tx_q; 746 747 bd_tx_q = (struct bond_tx_queue *)queue; 748 internals = bd_tx_q->dev_private; 749 750 if (internals->active_slave_count < 1) 751 return 0; 752 753 return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id, 754 bufs, nb_pkts); 755 } 756 757 static inline uint16_t 758 ether_hash(struct ether_hdr *eth_hdr) 759 { 760 unaligned_uint16_t *word_src_addr = 761 (unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes; 762 unaligned_uint16_t *word_dst_addr = 763 (unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes; 764 765 return (word_src_addr[0] ^ word_dst_addr[0]) ^ 766 (word_src_addr[1] ^ word_dst_addr[1]) ^ 767 (word_src_addr[2] ^ word_dst_addr[2]); 768 } 769 770 static inline uint32_t 771 ipv4_hash(struct ipv4_hdr *ipv4_hdr) 772 { 773 return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr; 774 } 775 776 static inline uint32_t 777 ipv6_hash(struct ipv6_hdr *ipv6_hdr) 778 { 779 unaligned_uint32_t *word_src_addr = 780 (unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]); 781 unaligned_uint32_t *word_dst_addr = 782 (unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]); 783 784 return (word_src_addr[0] ^ word_dst_addr[0]) ^ 785 (word_src_addr[1] ^ word_dst_addr[1]) ^ 786 (word_src_addr[2] ^ word_dst_addr[2]) ^ 787 (word_src_addr[3] ^ word_dst_addr[3]); 788 } 789 790 791 void 792 burst_xmit_l2_hash(struct rte_mbuf **buf, uint16_t nb_pkts, 793 uint8_t slave_count, uint16_t *slaves) 794 { 795 struct ether_hdr *eth_hdr; 796 uint32_t hash; 797 int i; 798 799 for (i = 0; i < nb_pkts; i++) { 800 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *); 801 802 hash = ether_hash(eth_hdr); 803 804 slaves[i] = (hash ^= hash >> 8) % slave_count; 805 } 806 } 807 808 void 809 burst_xmit_l23_hash(struct rte_mbuf **buf, uint16_t nb_pkts, 810 uint8_t slave_count, uint16_t *slaves) 811 { 812 uint16_t i; 813 struct ether_hdr *eth_hdr; 814 uint16_t proto; 815 size_t vlan_offset; 816 uint32_t hash, l3hash; 817 818 for (i = 0; i < nb_pkts; i++) { 819 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *); 820 l3hash = 0; 821 822 proto = eth_hdr->ether_type; 823 hash = ether_hash(eth_hdr); 824 825 vlan_offset = get_vlan_offset(eth_hdr, &proto); 826 827 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) { 828 struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *) 829 ((char *)(eth_hdr + 1) + vlan_offset); 830 l3hash = ipv4_hash(ipv4_hdr); 831 832 } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) { 833 struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *) 834 ((char *)(eth_hdr + 1) + vlan_offset); 835 l3hash = ipv6_hash(ipv6_hdr); 836 } 837 838 hash = hash ^ l3hash; 839 hash ^= hash >> 16; 840 hash ^= hash >> 8; 841 842 slaves[i] = hash % slave_count; 843 } 844 } 845 846 void 847 burst_xmit_l34_hash(struct rte_mbuf **buf, uint16_t nb_pkts, 848 uint8_t slave_count, uint16_t *slaves) 849 { 850 struct ether_hdr *eth_hdr; 851 uint16_t proto; 852 size_t vlan_offset; 853 int i; 854 855 struct udp_hdr *udp_hdr; 856 struct tcp_hdr *tcp_hdr; 857 uint32_t hash, l3hash, l4hash; 858 859 for (i = 0; i < nb_pkts; i++) { 860 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *); 861 proto = eth_hdr->ether_type; 862 vlan_offset = get_vlan_offset(eth_hdr, &proto); 863 l3hash = 0; 864 l4hash = 0; 865 866 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) { 867 struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *) 868 ((char *)(eth_hdr + 1) + vlan_offset); 869 size_t ip_hdr_offset; 870 871 l3hash = ipv4_hash(ipv4_hdr); 872 873 /* there is no L4 header in fragmented packet */ 874 if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr) 875 == 0)) { 876 ip_hdr_offset = (ipv4_hdr->version_ihl 877 & IPV4_HDR_IHL_MASK) * 878 IPV4_IHL_MULTIPLIER; 879 880 if (ipv4_hdr->next_proto_id == IPPROTO_TCP) { 881 tcp_hdr = (struct tcp_hdr *) 882 ((char *)ipv4_hdr + 883 ip_hdr_offset); 884 l4hash = HASH_L4_PORTS(tcp_hdr); 885 } else if (ipv4_hdr->next_proto_id == 886 IPPROTO_UDP) { 887 udp_hdr = (struct udp_hdr *) 888 ((char *)ipv4_hdr + 889 ip_hdr_offset); 890 l4hash = HASH_L4_PORTS(udp_hdr); 891 } 892 } 893 } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) { 894 struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *) 895 ((char *)(eth_hdr + 1) + vlan_offset); 896 l3hash = ipv6_hash(ipv6_hdr); 897 898 if (ipv6_hdr->proto == IPPROTO_TCP) { 899 tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1); 900 l4hash = HASH_L4_PORTS(tcp_hdr); 901 } else if (ipv6_hdr->proto == IPPROTO_UDP) { 902 udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1); 903 l4hash = HASH_L4_PORTS(udp_hdr); 904 } 905 } 906 907 hash = l3hash ^ l4hash; 908 hash ^= hash >> 16; 909 hash ^= hash >> 8; 910 911 slaves[i] = hash % slave_count; 912 } 913 } 914 915 struct bwg_slave { 916 uint64_t bwg_left_int; 917 uint64_t bwg_left_remainder; 918 uint8_t slave; 919 }; 920 921 void 922 bond_tlb_activate_slave(struct bond_dev_private *internals) { 923 int i; 924 925 for (i = 0; i < internals->active_slave_count; i++) { 926 tlb_last_obytets[internals->active_slaves[i]] = 0; 927 } 928 } 929 930 static int 931 bandwidth_cmp(const void *a, const void *b) 932 { 933 const struct bwg_slave *bwg_a = a; 934 const struct bwg_slave *bwg_b = b; 935 int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int; 936 int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder - 937 (int64_t)bwg_a->bwg_left_remainder; 938 if (diff > 0) 939 return 1; 940 else if (diff < 0) 941 return -1; 942 else if (diff2 > 0) 943 return 1; 944 else if (diff2 < 0) 945 return -1; 946 else 947 return 0; 948 } 949 950 static void 951 bandwidth_left(uint16_t port_id, uint64_t load, uint8_t update_idx, 952 struct bwg_slave *bwg_slave) 953 { 954 struct rte_eth_link link_status; 955 956 rte_eth_link_get_nowait(port_id, &link_status); 957 uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8; 958 if (link_bwg == 0) 959 return; 960 link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS; 961 bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg; 962 bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg; 963 } 964 965 static void 966 bond_ethdev_update_tlb_slave_cb(void *arg) 967 { 968 struct bond_dev_private *internals = arg; 969 struct rte_eth_stats slave_stats; 970 struct bwg_slave bwg_array[RTE_MAX_ETHPORTS]; 971 uint8_t slave_count; 972 uint64_t tx_bytes; 973 974 uint8_t update_stats = 0; 975 uint8_t i, slave_id; 976 977 internals->slave_update_idx++; 978 979 980 if (internals->slave_update_idx >= REORDER_PERIOD_MS) 981 update_stats = 1; 982 983 for (i = 0; i < internals->active_slave_count; i++) { 984 slave_id = internals->active_slaves[i]; 985 rte_eth_stats_get(slave_id, &slave_stats); 986 tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id]; 987 bandwidth_left(slave_id, tx_bytes, 988 internals->slave_update_idx, &bwg_array[i]); 989 bwg_array[i].slave = slave_id; 990 991 if (update_stats) { 992 tlb_last_obytets[slave_id] = slave_stats.obytes; 993 } 994 } 995 996 if (update_stats == 1) 997 internals->slave_update_idx = 0; 998 999 slave_count = i; 1000 qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp); 1001 for (i = 0; i < slave_count; i++) 1002 internals->tlb_slaves_order[i] = bwg_array[i].slave; 1003 1004 rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb, 1005 (struct bond_dev_private *)internals); 1006 } 1007 1008 static uint16_t 1009 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) 1010 { 1011 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue; 1012 struct bond_dev_private *internals = bd_tx_q->dev_private; 1013 1014 struct rte_eth_dev *primary_port = 1015 &rte_eth_devices[internals->primary_port]; 1016 uint16_t num_tx_total = 0; 1017 uint16_t i, j; 1018 1019 uint16_t num_of_slaves = internals->active_slave_count; 1020 uint16_t slaves[RTE_MAX_ETHPORTS]; 1021 1022 struct ether_hdr *ether_hdr; 1023 struct ether_addr primary_slave_addr; 1024 struct ether_addr active_slave_addr; 1025 1026 if (num_of_slaves < 1) 1027 return num_tx_total; 1028 1029 memcpy(slaves, internals->tlb_slaves_order, 1030 sizeof(internals->tlb_slaves_order[0]) * num_of_slaves); 1031 1032 1033 ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr); 1034 1035 if (nb_pkts > 3) { 1036 for (i = 0; i < 3; i++) 1037 rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*)); 1038 } 1039 1040 for (i = 0; i < num_of_slaves; i++) { 1041 rte_eth_macaddr_get(slaves[i], &active_slave_addr); 1042 for (j = num_tx_total; j < nb_pkts; j++) { 1043 if (j + 3 < nb_pkts) 1044 rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*)); 1045 1046 ether_hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *); 1047 if (is_same_ether_addr(ðer_hdr->s_addr, &primary_slave_addr)) 1048 ether_addr_copy(&active_slave_addr, ðer_hdr->s_addr); 1049 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1) 1050 mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX); 1051 #endif 1052 } 1053 1054 num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id, 1055 bufs + num_tx_total, nb_pkts - num_tx_total); 1056 1057 if (num_tx_total == nb_pkts) 1058 break; 1059 } 1060 1061 return num_tx_total; 1062 } 1063 1064 void 1065 bond_tlb_disable(struct bond_dev_private *internals) 1066 { 1067 rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals); 1068 } 1069 1070 void 1071 bond_tlb_enable(struct bond_dev_private *internals) 1072 { 1073 bond_ethdev_update_tlb_slave_cb(internals); 1074 } 1075 1076 static uint16_t 1077 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) 1078 { 1079 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue; 1080 struct bond_dev_private *internals = bd_tx_q->dev_private; 1081 1082 struct ether_hdr *eth_h; 1083 uint16_t ether_type, offset; 1084 1085 struct client_data *client_info; 1086 1087 /* 1088 * We create transmit buffers for every slave and one additional to send 1089 * through tlb. In worst case every packet will be send on one port. 1090 */ 1091 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts]; 1092 uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 }; 1093 1094 /* 1095 * We create separate transmit buffers for update packets as they won't 1096 * be counted in num_tx_total. 1097 */ 1098 struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE]; 1099 uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 }; 1100 1101 struct rte_mbuf *upd_pkt; 1102 size_t pkt_size; 1103 1104 uint16_t num_send, num_not_send = 0; 1105 uint16_t num_tx_total = 0; 1106 uint16_t slave_idx; 1107 1108 int i, j; 1109 1110 /* Search tx buffer for ARP packets and forward them to alb */ 1111 for (i = 0; i < nb_pkts; i++) { 1112 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *); 1113 ether_type = eth_h->ether_type; 1114 offset = get_vlan_offset(eth_h, ðer_type); 1115 1116 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) { 1117 slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals); 1118 1119 /* Change src mac in eth header */ 1120 rte_eth_macaddr_get(slave_idx, ð_h->s_addr); 1121 1122 /* Add packet to slave tx buffer */ 1123 slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i]; 1124 slave_bufs_pkts[slave_idx]++; 1125 } else { 1126 /* If packet is not ARP, send it with TLB policy */ 1127 slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] = 1128 bufs[i]; 1129 slave_bufs_pkts[RTE_MAX_ETHPORTS]++; 1130 } 1131 } 1132 1133 /* Update connected client ARP tables */ 1134 if (internals->mode6.ntt) { 1135 for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) { 1136 client_info = &internals->mode6.client_table[i]; 1137 1138 if (client_info->in_use) { 1139 /* Allocate new packet to send ARP update on current slave */ 1140 upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool); 1141 if (upd_pkt == NULL) { 1142 RTE_LOG(ERR, PMD, "Failed to allocate ARP packet from pool\n"); 1143 continue; 1144 } 1145 pkt_size = sizeof(struct ether_hdr) + sizeof(struct arp_hdr) 1146 + client_info->vlan_count * sizeof(struct vlan_hdr); 1147 upd_pkt->data_len = pkt_size; 1148 upd_pkt->pkt_len = pkt_size; 1149 1150 slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt, 1151 internals); 1152 1153 /* Add packet to update tx buffer */ 1154 update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt; 1155 update_bufs_pkts[slave_idx]++; 1156 } 1157 } 1158 internals->mode6.ntt = 0; 1159 } 1160 1161 /* Send ARP packets on proper slaves */ 1162 for (i = 0; i < RTE_MAX_ETHPORTS; i++) { 1163 if (slave_bufs_pkts[i] > 0) { 1164 num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, 1165 slave_bufs[i], slave_bufs_pkts[i]); 1166 for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) { 1167 bufs[nb_pkts - 1 - num_not_send - j] = 1168 slave_bufs[i][nb_pkts - 1 - j]; 1169 } 1170 1171 num_tx_total += num_send; 1172 num_not_send += slave_bufs_pkts[i] - num_send; 1173 1174 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1) 1175 /* Print TX stats including update packets */ 1176 for (j = 0; j < slave_bufs_pkts[i]; j++) { 1177 eth_h = rte_pktmbuf_mtod(slave_bufs[i][j], struct ether_hdr *); 1178 mode6_debug("TX ARP:", eth_h, i, &burstnumberTX); 1179 } 1180 #endif 1181 } 1182 } 1183 1184 /* Send update packets on proper slaves */ 1185 for (i = 0; i < RTE_MAX_ETHPORTS; i++) { 1186 if (update_bufs_pkts[i] > 0) { 1187 num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i], 1188 update_bufs_pkts[i]); 1189 for (j = num_send; j < update_bufs_pkts[i]; j++) { 1190 rte_pktmbuf_free(update_bufs[i][j]); 1191 } 1192 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1) 1193 for (j = 0; j < update_bufs_pkts[i]; j++) { 1194 eth_h = rte_pktmbuf_mtod(update_bufs[i][j], struct ether_hdr *); 1195 mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX); 1196 } 1197 #endif 1198 } 1199 } 1200 1201 /* Send non-ARP packets using tlb policy */ 1202 if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) { 1203 num_send = bond_ethdev_tx_burst_tlb(queue, 1204 slave_bufs[RTE_MAX_ETHPORTS], 1205 slave_bufs_pkts[RTE_MAX_ETHPORTS]); 1206 1207 for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) { 1208 bufs[nb_pkts - 1 - num_not_send - j] = 1209 slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j]; 1210 } 1211 1212 num_tx_total += num_send; 1213 } 1214 1215 return num_tx_total; 1216 } 1217 1218 static uint16_t 1219 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs, 1220 uint16_t nb_bufs) 1221 { 1222 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue; 1223 struct bond_dev_private *internals = bd_tx_q->dev_private; 1224 1225 uint16_t slave_port_ids[RTE_MAX_ETHPORTS]; 1226 uint16_t slave_count; 1227 1228 /* Array to sort mbufs for transmission on each slave into */ 1229 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs]; 1230 /* Number of mbufs for transmission on each slave */ 1231 uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 }; 1232 /* Mapping array generated by hash function to map mbufs to slaves */ 1233 uint16_t bufs_slave_port_idxs[nb_bufs]; 1234 1235 uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = { 0 }; 1236 uint16_t total_tx_count = 0, total_tx_fail_count = 0; 1237 1238 uint16_t i, j; 1239 1240 if (unlikely(nb_bufs == 0)) 1241 return 0; 1242 1243 /* Copy slave list to protect against slave up/down changes during tx 1244 * bursting */ 1245 slave_count = internals->active_slave_count; 1246 if (unlikely(slave_count < 1)) 1247 return 0; 1248 1249 memcpy(slave_port_ids, internals->active_slaves, 1250 sizeof(slave_port_ids[0]) * slave_count); 1251 1252 /* 1253 * Populate slaves mbuf with the packets which are to be sent on it 1254 * selecting output slave using hash based on xmit policy 1255 */ 1256 internals->burst_xmit_hash(bufs, nb_bufs, slave_count, 1257 bufs_slave_port_idxs); 1258 1259 for (i = 0; i < nb_bufs; i++) { 1260 /* Populate slave mbuf arrays with mbufs for that slave. */ 1261 uint8_t slave_idx = bufs_slave_port_idxs[i]; 1262 1263 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i]; 1264 } 1265 1266 /* Send packet burst on each slave device */ 1267 for (i = 0; i < slave_count; i++) { 1268 if (slave_nb_bufs[i] == 0) 1269 continue; 1270 1271 slave_tx_count = rte_eth_tx_burst(slave_port_ids[i], 1272 bd_tx_q->queue_id, slave_bufs[i], 1273 slave_nb_bufs[i]); 1274 1275 total_tx_count += slave_tx_count; 1276 1277 /* If tx burst fails move packets to end of bufs */ 1278 if (unlikely(slave_tx_count < slave_nb_bufs[i])) { 1279 slave_tx_fail_count[i] = slave_nb_bufs[i] - 1280 slave_tx_count; 1281 total_tx_fail_count += slave_tx_fail_count[i]; 1282 1283 /* 1284 * Shift bufs to beginning of array to allow reordering 1285 * later 1286 */ 1287 for (j = 0; j < slave_tx_fail_count[i]; j++) { 1288 slave_bufs[i][j] = 1289 slave_bufs[i][(slave_tx_count - 1) + j]; 1290 } 1291 } 1292 } 1293 1294 /* 1295 * If there are tx burst failures we move packets to end of bufs to 1296 * preserve expected PMD behaviour of all failed transmitted being 1297 * at the end of the input mbuf array 1298 */ 1299 if (unlikely(total_tx_fail_count > 0)) { 1300 int bufs_idx = nb_bufs - total_tx_fail_count - 1; 1301 1302 for (i = 0; i < slave_count; i++) { 1303 if (slave_tx_fail_count[i] > 0) { 1304 for (j = 0; j < slave_tx_fail_count[i]; j++) 1305 bufs[bufs_idx++] = slave_bufs[i][j]; 1306 } 1307 } 1308 } 1309 1310 return total_tx_count; 1311 } 1312 1313 static uint16_t 1314 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs, 1315 uint16_t nb_bufs) 1316 { 1317 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue; 1318 struct bond_dev_private *internals = bd_tx_q->dev_private; 1319 1320 uint16_t slave_port_ids[RTE_MAX_ETHPORTS]; 1321 uint16_t slave_count; 1322 1323 uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS]; 1324 uint16_t dist_slave_count; 1325 1326 /* 2-D array to sort mbufs for transmission on each slave into */ 1327 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs]; 1328 /* Number of mbufs for transmission on each slave */ 1329 uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 }; 1330 /* Mapping array generated by hash function to map mbufs to slaves */ 1331 uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 }; 1332 1333 uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = { 0 }; 1334 uint16_t total_tx_count = 0, total_tx_fail_count = 0; 1335 1336 uint16_t i, j; 1337 1338 if (unlikely(nb_bufs == 0)) 1339 return 0; 1340 1341 /* Copy slave list to protect against slave up/down changes during tx 1342 * bursting */ 1343 slave_count = internals->active_slave_count; 1344 if (unlikely(slave_count < 1)) 1345 return 0; 1346 1347 memcpy(slave_port_ids, internals->active_slaves, 1348 sizeof(slave_port_ids[0]) * slave_count); 1349 1350 dist_slave_count = 0; 1351 for (i = 0; i < slave_count; i++) { 1352 struct port *port = &mode_8023ad_ports[slave_port_ids[i]]; 1353 1354 if (ACTOR_STATE(port, DISTRIBUTING)) 1355 dist_slave_port_ids[dist_slave_count++] = 1356 slave_port_ids[i]; 1357 } 1358 1359 if (likely(dist_slave_count > 1)) { 1360 1361 /* 1362 * Populate slaves mbuf with the packets which are to be sent 1363 * on it, selecting output slave using hash based on xmit policy 1364 */ 1365 internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count, 1366 bufs_slave_port_idxs); 1367 1368 for (i = 0; i < nb_bufs; i++) { 1369 /* 1370 * Populate slave mbuf arrays with mbufs for that 1371 * slave 1372 */ 1373 uint8_t slave_idx = bufs_slave_port_idxs[i]; 1374 1375 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = 1376 bufs[i]; 1377 } 1378 1379 1380 /* Send packet burst on each slave device */ 1381 for (i = 0; i < dist_slave_count; i++) { 1382 if (slave_nb_bufs[i] == 0) 1383 continue; 1384 1385 slave_tx_count = rte_eth_tx_burst( 1386 dist_slave_port_ids[i], 1387 bd_tx_q->queue_id, slave_bufs[i], 1388 slave_nb_bufs[i]); 1389 1390 total_tx_count += slave_tx_count; 1391 1392 /* If tx burst fails move packets to end of bufs */ 1393 if (unlikely(slave_tx_count < slave_nb_bufs[i])) { 1394 slave_tx_fail_count[i] = slave_nb_bufs[i] - 1395 slave_tx_count; 1396 total_tx_fail_count += slave_tx_fail_count[i]; 1397 1398 /* 1399 * Shift bufs to beginning of array to allow 1400 * reordering later 1401 */ 1402 for (j = 0; j < slave_tx_fail_count[i]; j++) 1403 slave_bufs[i][j] = 1404 slave_bufs[i] 1405 [(slave_tx_count - 1) 1406 + j]; 1407 } 1408 } 1409 1410 /* 1411 * If there are tx burst failures we move packets to end of 1412 * bufs to preserve expected PMD behaviour of all failed 1413 * transmitted being at the end of the input mbuf array 1414 */ 1415 if (unlikely(total_tx_fail_count > 0)) { 1416 int bufs_idx = nb_bufs - total_tx_fail_count - 1; 1417 1418 for (i = 0; i < slave_count; i++) { 1419 if (slave_tx_fail_count[i] > 0) { 1420 for (j = 0; 1421 j < slave_tx_fail_count[i]; 1422 j++) { 1423 bufs[bufs_idx++] = 1424 slave_bufs[i][j]; 1425 } 1426 } 1427 } 1428 } 1429 } 1430 1431 /* Check for LACP control packets and send if available */ 1432 for (i = 0; i < slave_count; i++) { 1433 struct port *port = &mode_8023ad_ports[slave_port_ids[i]]; 1434 struct rte_mbuf *ctrl_pkt = NULL; 1435 1436 if (likely(rte_ring_empty(port->tx_ring))) 1437 continue; 1438 1439 if (rte_ring_dequeue(port->tx_ring, 1440 (void **)&ctrl_pkt) != -ENOENT) { 1441 slave_tx_count = rte_eth_tx_burst(slave_port_ids[i], 1442 bd_tx_q->queue_id, &ctrl_pkt, 1); 1443 /* 1444 * re-enqueue LAG control plane packets to buffering 1445 * ring if transmission fails so the packet isn't lost. 1446 */ 1447 if (slave_tx_count != 1) 1448 rte_ring_enqueue(port->tx_ring, ctrl_pkt); 1449 } 1450 } 1451 1452 return total_tx_count; 1453 } 1454 1455 static uint16_t 1456 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs, 1457 uint16_t nb_pkts) 1458 { 1459 struct bond_dev_private *internals; 1460 struct bond_tx_queue *bd_tx_q; 1461 1462 uint8_t tx_failed_flag = 0, num_of_slaves; 1463 uint16_t slaves[RTE_MAX_ETHPORTS]; 1464 1465 uint16_t max_nb_of_tx_pkts = 0; 1466 1467 int slave_tx_total[RTE_MAX_ETHPORTS]; 1468 int i, most_successful_tx_slave = -1; 1469 1470 bd_tx_q = (struct bond_tx_queue *)queue; 1471 internals = bd_tx_q->dev_private; 1472 1473 /* Copy slave list to protect against slave up/down changes during tx 1474 * bursting */ 1475 num_of_slaves = internals->active_slave_count; 1476 memcpy(slaves, internals->active_slaves, 1477 sizeof(internals->active_slaves[0]) * num_of_slaves); 1478 1479 if (num_of_slaves < 1) 1480 return 0; 1481 1482 /* Increment reference count on mbufs */ 1483 for (i = 0; i < nb_pkts; i++) 1484 rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1); 1485 1486 /* Transmit burst on each active slave */ 1487 for (i = 0; i < num_of_slaves; i++) { 1488 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id, 1489 bufs, nb_pkts); 1490 1491 if (unlikely(slave_tx_total[i] < nb_pkts)) 1492 tx_failed_flag = 1; 1493 1494 /* record the value and slave index for the slave which transmits the 1495 * maximum number of packets */ 1496 if (slave_tx_total[i] > max_nb_of_tx_pkts) { 1497 max_nb_of_tx_pkts = slave_tx_total[i]; 1498 most_successful_tx_slave = i; 1499 } 1500 } 1501 1502 /* if slaves fail to transmit packets from burst, the calling application 1503 * is not expected to know about multiple references to packets so we must 1504 * handle failures of all packets except those of the most successful slave 1505 */ 1506 if (unlikely(tx_failed_flag)) 1507 for (i = 0; i < num_of_slaves; i++) 1508 if (i != most_successful_tx_slave) 1509 while (slave_tx_total[i] < nb_pkts) 1510 rte_pktmbuf_free(bufs[slave_tx_total[i]++]); 1511 1512 return max_nb_of_tx_pkts; 1513 } 1514 1515 void 1516 link_properties_set(struct rte_eth_dev *ethdev, struct rte_eth_link *slave_link) 1517 { 1518 struct bond_dev_private *bond_ctx = ethdev->data->dev_private; 1519 1520 if (bond_ctx->mode == BONDING_MODE_8023AD) { 1521 /** 1522 * If in mode 4 then save the link properties of the first 1523 * slave, all subsequent slaves must match these properties 1524 */ 1525 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link; 1526 1527 bond_link->link_autoneg = slave_link->link_autoneg; 1528 bond_link->link_duplex = slave_link->link_duplex; 1529 bond_link->link_speed = slave_link->link_speed; 1530 } else { 1531 /** 1532 * In any other mode the link properties are set to default 1533 * values of AUTONEG/DUPLEX 1534 */ 1535 ethdev->data->dev_link.link_autoneg = ETH_LINK_AUTONEG; 1536 ethdev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX; 1537 } 1538 } 1539 1540 int 1541 link_properties_valid(struct rte_eth_dev *ethdev, 1542 struct rte_eth_link *slave_link) 1543 { 1544 struct bond_dev_private *bond_ctx = ethdev->data->dev_private; 1545 1546 if (bond_ctx->mode == BONDING_MODE_8023AD) { 1547 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link; 1548 1549 if (bond_link->link_duplex != slave_link->link_duplex || 1550 bond_link->link_autoneg != slave_link->link_autoneg || 1551 bond_link->link_speed != slave_link->link_speed) 1552 return -1; 1553 } 1554 1555 return 0; 1556 } 1557 1558 int 1559 mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr) 1560 { 1561 struct ether_addr *mac_addr; 1562 1563 if (eth_dev == NULL) { 1564 RTE_LOG(ERR, PMD, "%s: NULL pointer eth_dev specified\n", __func__); 1565 return -1; 1566 } 1567 1568 if (dst_mac_addr == NULL) { 1569 RTE_LOG(ERR, PMD, "%s: NULL pointer MAC specified\n", __func__); 1570 return -1; 1571 } 1572 1573 mac_addr = eth_dev->data->mac_addrs; 1574 1575 ether_addr_copy(mac_addr, dst_mac_addr); 1576 return 0; 1577 } 1578 1579 int 1580 mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr) 1581 { 1582 struct ether_addr *mac_addr; 1583 1584 if (eth_dev == NULL) { 1585 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified"); 1586 return -1; 1587 } 1588 1589 if (new_mac_addr == NULL) { 1590 RTE_BOND_LOG(ERR, "NULL pointer MAC specified"); 1591 return -1; 1592 } 1593 1594 mac_addr = eth_dev->data->mac_addrs; 1595 1596 /* If new MAC is different to current MAC then update */ 1597 if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0) 1598 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr)); 1599 1600 return 0; 1601 } 1602 1603 int 1604 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev) 1605 { 1606 struct bond_dev_private *internals = bonded_eth_dev->data->dev_private; 1607 int i; 1608 1609 /* Update slave devices MAC addresses */ 1610 if (internals->slave_count < 1) 1611 return -1; 1612 1613 switch (internals->mode) { 1614 case BONDING_MODE_ROUND_ROBIN: 1615 case BONDING_MODE_BALANCE: 1616 case BONDING_MODE_BROADCAST: 1617 for (i = 0; i < internals->slave_count; i++) { 1618 if (rte_eth_dev_default_mac_addr_set( 1619 internals->slaves[i].port_id, 1620 bonded_eth_dev->data->mac_addrs)) { 1621 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address", 1622 internals->slaves[i].port_id); 1623 return -1; 1624 } 1625 } 1626 break; 1627 case BONDING_MODE_8023AD: 1628 bond_mode_8023ad_mac_address_update(bonded_eth_dev); 1629 break; 1630 case BONDING_MODE_ACTIVE_BACKUP: 1631 case BONDING_MODE_TLB: 1632 case BONDING_MODE_ALB: 1633 default: 1634 for (i = 0; i < internals->slave_count; i++) { 1635 if (internals->slaves[i].port_id == 1636 internals->current_primary_port) { 1637 if (rte_eth_dev_default_mac_addr_set( 1638 internals->primary_port, 1639 bonded_eth_dev->data->mac_addrs)) { 1640 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address", 1641 internals->current_primary_port); 1642 return -1; 1643 } 1644 } else { 1645 if (rte_eth_dev_default_mac_addr_set( 1646 internals->slaves[i].port_id, 1647 &internals->slaves[i].persisted_mac_addr)) { 1648 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address", 1649 internals->slaves[i].port_id); 1650 return -1; 1651 } 1652 } 1653 } 1654 } 1655 1656 return 0; 1657 } 1658 1659 int 1660 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode) 1661 { 1662 struct bond_dev_private *internals; 1663 1664 internals = eth_dev->data->dev_private; 1665 1666 switch (mode) { 1667 case BONDING_MODE_ROUND_ROBIN: 1668 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin; 1669 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst; 1670 break; 1671 case BONDING_MODE_ACTIVE_BACKUP: 1672 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup; 1673 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup; 1674 break; 1675 case BONDING_MODE_BALANCE: 1676 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance; 1677 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst; 1678 break; 1679 case BONDING_MODE_BROADCAST: 1680 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast; 1681 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst; 1682 break; 1683 case BONDING_MODE_8023AD: 1684 if (bond_mode_8023ad_enable(eth_dev) != 0) 1685 return -1; 1686 1687 if (internals->mode4.dedicated_queues.enabled == 0) { 1688 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad; 1689 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad; 1690 RTE_LOG(WARNING, PMD, 1691 "Using mode 4, it is necessary to do TX burst " 1692 "and RX burst at least every 100ms.\n"); 1693 } else { 1694 /* Use flow director's optimization */ 1695 eth_dev->rx_pkt_burst = 1696 bond_ethdev_rx_burst_8023ad_fast_queue; 1697 eth_dev->tx_pkt_burst = 1698 bond_ethdev_tx_burst_8023ad_fast_queue; 1699 } 1700 break; 1701 case BONDING_MODE_TLB: 1702 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb; 1703 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup; 1704 break; 1705 case BONDING_MODE_ALB: 1706 if (bond_mode_alb_enable(eth_dev) != 0) 1707 return -1; 1708 1709 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb; 1710 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb; 1711 break; 1712 default: 1713 return -1; 1714 } 1715 1716 internals->mode = mode; 1717 1718 return 0; 1719 } 1720 1721 1722 static int 1723 slave_configure_slow_queue(struct rte_eth_dev *bonded_eth_dev, 1724 struct rte_eth_dev *slave_eth_dev) 1725 { 1726 int errval = 0; 1727 struct bond_dev_private *internals = (struct bond_dev_private *) 1728 bonded_eth_dev->data->dev_private; 1729 struct port *port = &mode_8023ad_ports[slave_eth_dev->data->port_id]; 1730 1731 if (port->slow_pool == NULL) { 1732 char mem_name[256]; 1733 int slave_id = slave_eth_dev->data->port_id; 1734 1735 snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_slow_pool", 1736 slave_id); 1737 port->slow_pool = rte_pktmbuf_pool_create(mem_name, 8191, 1738 250, 0, RTE_MBUF_DEFAULT_BUF_SIZE, 1739 slave_eth_dev->data->numa_node); 1740 1741 /* Any memory allocation failure in initialization is critical because 1742 * resources can't be free, so reinitialization is impossible. */ 1743 if (port->slow_pool == NULL) { 1744 rte_panic("Slave %u: Failed to create memory pool '%s': %s\n", 1745 slave_id, mem_name, rte_strerror(rte_errno)); 1746 } 1747 } 1748 1749 if (internals->mode4.dedicated_queues.enabled == 1) { 1750 /* Configure slow Rx queue */ 1751 1752 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, 1753 internals->mode4.dedicated_queues.rx_qid, 128, 1754 rte_eth_dev_socket_id(slave_eth_dev->data->port_id), 1755 NULL, port->slow_pool); 1756 if (errval != 0) { 1757 RTE_BOND_LOG(ERR, 1758 "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)", 1759 slave_eth_dev->data->port_id, 1760 internals->mode4.dedicated_queues.rx_qid, 1761 errval); 1762 return errval; 1763 } 1764 1765 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, 1766 internals->mode4.dedicated_queues.tx_qid, 512, 1767 rte_eth_dev_socket_id(slave_eth_dev->data->port_id), 1768 NULL); 1769 if (errval != 0) { 1770 RTE_BOND_LOG(ERR, 1771 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)", 1772 slave_eth_dev->data->port_id, 1773 internals->mode4.dedicated_queues.tx_qid, 1774 errval); 1775 return errval; 1776 } 1777 } 1778 return 0; 1779 } 1780 1781 int 1782 slave_configure(struct rte_eth_dev *bonded_eth_dev, 1783 struct rte_eth_dev *slave_eth_dev) 1784 { 1785 struct bond_rx_queue *bd_rx_q; 1786 struct bond_tx_queue *bd_tx_q; 1787 uint16_t nb_rx_queues; 1788 uint16_t nb_tx_queues; 1789 1790 int errval; 1791 uint16_t q_id; 1792 struct rte_flow_error flow_error; 1793 1794 struct bond_dev_private *internals = (struct bond_dev_private *) 1795 bonded_eth_dev->data->dev_private; 1796 1797 /* Stop slave */ 1798 rte_eth_dev_stop(slave_eth_dev->data->port_id); 1799 1800 /* Enable interrupts on slave device if supported */ 1801 if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) 1802 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1; 1803 1804 /* If RSS is enabled for bonding, try to enable it for slaves */ 1805 if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) { 1806 if (bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len 1807 != 0) { 1808 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len = 1809 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len; 1810 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = 1811 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key; 1812 } else { 1813 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL; 1814 } 1815 1816 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = 1817 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf; 1818 slave_eth_dev->data->dev_conf.rxmode.mq_mode = 1819 bonded_eth_dev->data->dev_conf.rxmode.mq_mode; 1820 } 1821 1822 if (bonded_eth_dev->data->dev_conf.rxmode.offloads & 1823 DEV_RX_OFFLOAD_VLAN_FILTER) 1824 slave_eth_dev->data->dev_conf.rxmode.offloads |= 1825 DEV_RX_OFFLOAD_VLAN_FILTER; 1826 else 1827 slave_eth_dev->data->dev_conf.rxmode.offloads &= 1828 ~DEV_RX_OFFLOAD_VLAN_FILTER; 1829 1830 nb_rx_queues = bonded_eth_dev->data->nb_rx_queues; 1831 nb_tx_queues = bonded_eth_dev->data->nb_tx_queues; 1832 1833 if (internals->mode == BONDING_MODE_8023AD) { 1834 if (internals->mode4.dedicated_queues.enabled == 1) { 1835 nb_rx_queues++; 1836 nb_tx_queues++; 1837 } 1838 } 1839 1840 errval = rte_eth_dev_set_mtu(slave_eth_dev->data->port_id, 1841 bonded_eth_dev->data->mtu); 1842 if (errval != 0 && errval != -ENOTSUP) { 1843 RTE_BOND_LOG(ERR, "rte_eth_dev_set_mtu: port %u, err (%d)", 1844 slave_eth_dev->data->port_id, errval); 1845 return errval; 1846 } 1847 1848 /* Configure device */ 1849 errval = rte_eth_dev_configure(slave_eth_dev->data->port_id, 1850 nb_rx_queues, nb_tx_queues, 1851 &(slave_eth_dev->data->dev_conf)); 1852 if (errval != 0) { 1853 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u , err (%d)", 1854 slave_eth_dev->data->port_id, errval); 1855 return errval; 1856 } 1857 1858 /* Setup Rx Queues */ 1859 for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) { 1860 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id]; 1861 1862 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id, 1863 bd_rx_q->nb_rx_desc, 1864 rte_eth_dev_socket_id(slave_eth_dev->data->port_id), 1865 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool); 1866 if (errval != 0) { 1867 RTE_BOND_LOG(ERR, 1868 "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)", 1869 slave_eth_dev->data->port_id, q_id, errval); 1870 return errval; 1871 } 1872 } 1873 1874 /* Setup Tx Queues */ 1875 for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) { 1876 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id]; 1877 1878 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id, 1879 bd_tx_q->nb_tx_desc, 1880 rte_eth_dev_socket_id(slave_eth_dev->data->port_id), 1881 &bd_tx_q->tx_conf); 1882 if (errval != 0) { 1883 RTE_BOND_LOG(ERR, 1884 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)", 1885 slave_eth_dev->data->port_id, q_id, errval); 1886 return errval; 1887 } 1888 } 1889 1890 if (internals->mode == BONDING_MODE_8023AD && 1891 internals->mode4.dedicated_queues.enabled == 1) { 1892 if (slave_configure_slow_queue(bonded_eth_dev, slave_eth_dev) 1893 != 0) 1894 return errval; 1895 1896 if (bond_ethdev_8023ad_flow_verify(bonded_eth_dev, 1897 slave_eth_dev->data->port_id) != 0) { 1898 RTE_BOND_LOG(ERR, 1899 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)", 1900 slave_eth_dev->data->port_id, q_id, errval); 1901 return -1; 1902 } 1903 1904 if (internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id] != NULL) 1905 rte_flow_destroy(slave_eth_dev->data->port_id, 1906 internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id], 1907 &flow_error); 1908 1909 bond_ethdev_8023ad_flow_set(bonded_eth_dev, 1910 slave_eth_dev->data->port_id); 1911 } 1912 1913 /* Start device */ 1914 errval = rte_eth_dev_start(slave_eth_dev->data->port_id); 1915 if (errval != 0) { 1916 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)", 1917 slave_eth_dev->data->port_id, errval); 1918 return -1; 1919 } 1920 1921 /* If RSS is enabled for bonding, synchronize RETA */ 1922 if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) { 1923 int i; 1924 struct bond_dev_private *internals; 1925 1926 internals = bonded_eth_dev->data->dev_private; 1927 1928 for (i = 0; i < internals->slave_count; i++) { 1929 if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) { 1930 errval = rte_eth_dev_rss_reta_update( 1931 slave_eth_dev->data->port_id, 1932 &internals->reta_conf[0], 1933 internals->slaves[i].reta_size); 1934 if (errval != 0) { 1935 RTE_LOG(WARNING, PMD, 1936 "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)." 1937 " RSS Configuration for bonding may be inconsistent.\n", 1938 slave_eth_dev->data->port_id, errval); 1939 } 1940 break; 1941 } 1942 } 1943 } 1944 1945 /* If lsc interrupt is set, check initial slave's link status */ 1946 if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) { 1947 slave_eth_dev->dev_ops->link_update(slave_eth_dev, 0); 1948 bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id, 1949 RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id, 1950 NULL); 1951 } 1952 1953 return 0; 1954 } 1955 1956 void 1957 slave_remove(struct bond_dev_private *internals, 1958 struct rte_eth_dev *slave_eth_dev) 1959 { 1960 uint8_t i; 1961 1962 for (i = 0; i < internals->slave_count; i++) 1963 if (internals->slaves[i].port_id == 1964 slave_eth_dev->data->port_id) 1965 break; 1966 1967 if (i < (internals->slave_count - 1)) { 1968 struct rte_flow *flow; 1969 1970 memmove(&internals->slaves[i], &internals->slaves[i + 1], 1971 sizeof(internals->slaves[0]) * 1972 (internals->slave_count - i - 1)); 1973 TAILQ_FOREACH(flow, &internals->flow_list, next) { 1974 memmove(&flow->flows[i], &flow->flows[i + 1], 1975 sizeof(flow->flows[0]) * 1976 (internals->slave_count - i - 1)); 1977 flow->flows[internals->slave_count - 1] = NULL; 1978 } 1979 } 1980 1981 internals->slave_count--; 1982 1983 /* force reconfiguration of slave interfaces */ 1984 _rte_eth_dev_reset(slave_eth_dev); 1985 } 1986 1987 static void 1988 bond_ethdev_slave_link_status_change_monitor(void *cb_arg); 1989 1990 void 1991 slave_add(struct bond_dev_private *internals, 1992 struct rte_eth_dev *slave_eth_dev) 1993 { 1994 struct bond_slave_details *slave_details = 1995 &internals->slaves[internals->slave_count]; 1996 1997 slave_details->port_id = slave_eth_dev->data->port_id; 1998 slave_details->last_link_status = 0; 1999 2000 /* Mark slave devices that don't support interrupts so we can 2001 * compensate when we start the bond 2002 */ 2003 if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) { 2004 slave_details->link_status_poll_enabled = 1; 2005 } 2006 2007 slave_details->link_status_wait_to_complete = 0; 2008 /* clean tlb_last_obytes when adding port for bonding device */ 2009 memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs, 2010 sizeof(struct ether_addr)); 2011 } 2012 2013 void 2014 bond_ethdev_primary_set(struct bond_dev_private *internals, 2015 uint16_t slave_port_id) 2016 { 2017 int i; 2018 2019 if (internals->active_slave_count < 1) 2020 internals->current_primary_port = slave_port_id; 2021 else 2022 /* Search bonded device slave ports for new proposed primary port */ 2023 for (i = 0; i < internals->active_slave_count; i++) { 2024 if (internals->active_slaves[i] == slave_port_id) 2025 internals->current_primary_port = slave_port_id; 2026 } 2027 } 2028 2029 static void 2030 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev); 2031 2032 static int 2033 bond_ethdev_start(struct rte_eth_dev *eth_dev) 2034 { 2035 struct bond_dev_private *internals; 2036 int i; 2037 2038 /* slave eth dev will be started by bonded device */ 2039 if (check_for_bonded_ethdev(eth_dev)) { 2040 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)", 2041 eth_dev->data->port_id); 2042 return -1; 2043 } 2044 2045 eth_dev->data->dev_link.link_status = ETH_LINK_DOWN; 2046 eth_dev->data->dev_started = 1; 2047 2048 internals = eth_dev->data->dev_private; 2049 2050 if (internals->slave_count == 0) { 2051 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices"); 2052 goto out_err; 2053 } 2054 2055 if (internals->user_defined_mac == 0) { 2056 struct ether_addr *new_mac_addr = NULL; 2057 2058 for (i = 0; i < internals->slave_count; i++) 2059 if (internals->slaves[i].port_id == internals->primary_port) 2060 new_mac_addr = &internals->slaves[i].persisted_mac_addr; 2061 2062 if (new_mac_addr == NULL) 2063 goto out_err; 2064 2065 if (mac_address_set(eth_dev, new_mac_addr) != 0) { 2066 RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address", 2067 eth_dev->data->port_id); 2068 goto out_err; 2069 } 2070 } 2071 2072 /* Update all slave devices MACs*/ 2073 if (mac_address_slaves_update(eth_dev) != 0) 2074 goto out_err; 2075 2076 /* If bonded device is configure in promiscuous mode then re-apply config */ 2077 if (internals->promiscuous_en) 2078 bond_ethdev_promiscuous_enable(eth_dev); 2079 2080 if (internals->mode == BONDING_MODE_8023AD) { 2081 if (internals->mode4.dedicated_queues.enabled == 1) { 2082 internals->mode4.dedicated_queues.rx_qid = 2083 eth_dev->data->nb_rx_queues; 2084 internals->mode4.dedicated_queues.tx_qid = 2085 eth_dev->data->nb_tx_queues; 2086 } 2087 } 2088 2089 2090 /* Reconfigure each slave device if starting bonded device */ 2091 for (i = 0; i < internals->slave_count; i++) { 2092 struct rte_eth_dev *slave_ethdev = 2093 &(rte_eth_devices[internals->slaves[i].port_id]); 2094 if (slave_configure(eth_dev, slave_ethdev) != 0) { 2095 RTE_BOND_LOG(ERR, 2096 "bonded port (%d) failed to reconfigure slave device (%d)", 2097 eth_dev->data->port_id, 2098 internals->slaves[i].port_id); 2099 goto out_err; 2100 } 2101 /* We will need to poll for link status if any slave doesn't 2102 * support interrupts 2103 */ 2104 if (internals->slaves[i].link_status_poll_enabled) 2105 internals->link_status_polling_enabled = 1; 2106 } 2107 2108 /* start polling if needed */ 2109 if (internals->link_status_polling_enabled) { 2110 rte_eal_alarm_set( 2111 internals->link_status_polling_interval_ms * 1000, 2112 bond_ethdev_slave_link_status_change_monitor, 2113 (void *)&rte_eth_devices[internals->port_id]); 2114 } 2115 2116 if (internals->user_defined_primary_port) 2117 bond_ethdev_primary_set(internals, internals->primary_port); 2118 2119 if (internals->mode == BONDING_MODE_8023AD) 2120 bond_mode_8023ad_start(eth_dev); 2121 2122 if (internals->mode == BONDING_MODE_TLB || 2123 internals->mode == BONDING_MODE_ALB) 2124 bond_tlb_enable(internals); 2125 2126 return 0; 2127 2128 out_err: 2129 eth_dev->data->dev_started = 0; 2130 return -1; 2131 } 2132 2133 static void 2134 bond_ethdev_free_queues(struct rte_eth_dev *dev) 2135 { 2136 uint8_t i; 2137 2138 if (dev->data->rx_queues != NULL) { 2139 for (i = 0; i < dev->data->nb_rx_queues; i++) { 2140 rte_free(dev->data->rx_queues[i]); 2141 dev->data->rx_queues[i] = NULL; 2142 } 2143 dev->data->nb_rx_queues = 0; 2144 } 2145 2146 if (dev->data->tx_queues != NULL) { 2147 for (i = 0; i < dev->data->nb_tx_queues; i++) { 2148 rte_free(dev->data->tx_queues[i]); 2149 dev->data->tx_queues[i] = NULL; 2150 } 2151 dev->data->nb_tx_queues = 0; 2152 } 2153 } 2154 2155 void 2156 bond_ethdev_stop(struct rte_eth_dev *eth_dev) 2157 { 2158 struct bond_dev_private *internals = eth_dev->data->dev_private; 2159 uint8_t i; 2160 2161 if (internals->mode == BONDING_MODE_8023AD) { 2162 struct port *port; 2163 void *pkt = NULL; 2164 2165 bond_mode_8023ad_stop(eth_dev); 2166 2167 /* Discard all messages to/from mode 4 state machines */ 2168 for (i = 0; i < internals->active_slave_count; i++) { 2169 port = &mode_8023ad_ports[internals->active_slaves[i]]; 2170 2171 RTE_ASSERT(port->rx_ring != NULL); 2172 while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT) 2173 rte_pktmbuf_free(pkt); 2174 2175 RTE_ASSERT(port->tx_ring != NULL); 2176 while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT) 2177 rte_pktmbuf_free(pkt); 2178 } 2179 } 2180 2181 if (internals->mode == BONDING_MODE_TLB || 2182 internals->mode == BONDING_MODE_ALB) { 2183 bond_tlb_disable(internals); 2184 for (i = 0; i < internals->active_slave_count; i++) 2185 tlb_last_obytets[internals->active_slaves[i]] = 0; 2186 } 2187 2188 internals->active_slave_count = 0; 2189 internals->link_status_polling_enabled = 0; 2190 for (i = 0; i < internals->slave_count; i++) 2191 internals->slaves[i].last_link_status = 0; 2192 2193 eth_dev->data->dev_link.link_status = ETH_LINK_DOWN; 2194 eth_dev->data->dev_started = 0; 2195 } 2196 2197 void 2198 bond_ethdev_close(struct rte_eth_dev *dev) 2199 { 2200 struct bond_dev_private *internals = dev->data->dev_private; 2201 uint8_t bond_port_id = internals->port_id; 2202 int skipped = 0; 2203 struct rte_flow_error ferror; 2204 2205 RTE_LOG(INFO, EAL, "Closing bonded device %s\n", dev->device->name); 2206 while (internals->slave_count != skipped) { 2207 uint16_t port_id = internals->slaves[skipped].port_id; 2208 2209 rte_eth_dev_stop(port_id); 2210 2211 if (rte_eth_bond_slave_remove(bond_port_id, port_id) != 0) { 2212 RTE_LOG(ERR, EAL, 2213 "Failed to remove port %d from bonded device " 2214 "%s\n", port_id, dev->device->name); 2215 skipped++; 2216 } 2217 } 2218 bond_flow_ops.flush(dev, &ferror); 2219 bond_ethdev_free_queues(dev); 2220 rte_bitmap_reset(internals->vlan_filter_bmp); 2221 } 2222 2223 /* forward declaration */ 2224 static int bond_ethdev_configure(struct rte_eth_dev *dev); 2225 2226 static void 2227 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) 2228 { 2229 struct bond_dev_private *internals = dev->data->dev_private; 2230 2231 uint16_t max_nb_rx_queues = UINT16_MAX; 2232 uint16_t max_nb_tx_queues = UINT16_MAX; 2233 2234 dev_info->max_mac_addrs = 1; 2235 2236 dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen ? 2237 internals->candidate_max_rx_pktlen : 2238 ETHER_MAX_JUMBO_FRAME_LEN; 2239 2240 /* Max number of tx/rx queues that the bonded device can support is the 2241 * minimum values of the bonded slaves, as all slaves must be capable 2242 * of supporting the same number of tx/rx queues. 2243 */ 2244 if (internals->slave_count > 0) { 2245 struct rte_eth_dev_info slave_info; 2246 uint8_t idx; 2247 2248 for (idx = 0; idx < internals->slave_count; idx++) { 2249 rte_eth_dev_info_get(internals->slaves[idx].port_id, 2250 &slave_info); 2251 2252 if (slave_info.max_rx_queues < max_nb_rx_queues) 2253 max_nb_rx_queues = slave_info.max_rx_queues; 2254 2255 if (slave_info.max_tx_queues < max_nb_tx_queues) 2256 max_nb_tx_queues = slave_info.max_tx_queues; 2257 } 2258 } 2259 2260 dev_info->max_rx_queues = max_nb_rx_queues; 2261 dev_info->max_tx_queues = max_nb_tx_queues; 2262 2263 /** 2264 * If dedicated hw queues enabled for link bonding device in LACP mode 2265 * then we need to reduce the maximum number of data path queues by 1. 2266 */ 2267 if (internals->mode == BONDING_MODE_8023AD && 2268 internals->mode4.dedicated_queues.enabled == 1) { 2269 dev_info->max_rx_queues--; 2270 dev_info->max_tx_queues--; 2271 } 2272 2273 dev_info->min_rx_bufsize = 0; 2274 2275 dev_info->rx_offload_capa = internals->rx_offload_capa; 2276 dev_info->tx_offload_capa = internals->tx_offload_capa; 2277 dev_info->rx_queue_offload_capa = internals->rx_queue_offload_capa; 2278 dev_info->tx_queue_offload_capa = internals->tx_queue_offload_capa; 2279 dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads; 2280 2281 dev_info->reta_size = internals->reta_size; 2282 } 2283 2284 static int 2285 bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on) 2286 { 2287 int res; 2288 uint16_t i; 2289 struct bond_dev_private *internals = dev->data->dev_private; 2290 2291 /* don't do this while a slave is being added */ 2292 rte_spinlock_lock(&internals->lock); 2293 2294 if (on) 2295 rte_bitmap_set(internals->vlan_filter_bmp, vlan_id); 2296 else 2297 rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id); 2298 2299 for (i = 0; i < internals->slave_count; i++) { 2300 uint16_t port_id = internals->slaves[i].port_id; 2301 2302 res = rte_eth_dev_vlan_filter(port_id, vlan_id, on); 2303 if (res == ENOTSUP) 2304 RTE_LOG(WARNING, PMD, 2305 "Setting VLAN filter on slave port %u not supported.\n", 2306 port_id); 2307 } 2308 2309 rte_spinlock_unlock(&internals->lock); 2310 return 0; 2311 } 2312 2313 static int 2314 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id, 2315 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused, 2316 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool) 2317 { 2318 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *) 2319 rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue), 2320 0, dev->data->numa_node); 2321 if (bd_rx_q == NULL) 2322 return -1; 2323 2324 bd_rx_q->queue_id = rx_queue_id; 2325 bd_rx_q->dev_private = dev->data->dev_private; 2326 2327 bd_rx_q->nb_rx_desc = nb_rx_desc; 2328 2329 memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf)); 2330 bd_rx_q->mb_pool = mb_pool; 2331 2332 dev->data->rx_queues[rx_queue_id] = bd_rx_q; 2333 2334 return 0; 2335 } 2336 2337 static int 2338 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id, 2339 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused, 2340 const struct rte_eth_txconf *tx_conf) 2341 { 2342 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *) 2343 rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue), 2344 0, dev->data->numa_node); 2345 2346 if (bd_tx_q == NULL) 2347 return -1; 2348 2349 bd_tx_q->queue_id = tx_queue_id; 2350 bd_tx_q->dev_private = dev->data->dev_private; 2351 2352 bd_tx_q->nb_tx_desc = nb_tx_desc; 2353 memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf)); 2354 2355 dev->data->tx_queues[tx_queue_id] = bd_tx_q; 2356 2357 return 0; 2358 } 2359 2360 static void 2361 bond_ethdev_rx_queue_release(void *queue) 2362 { 2363 if (queue == NULL) 2364 return; 2365 2366 rte_free(queue); 2367 } 2368 2369 static void 2370 bond_ethdev_tx_queue_release(void *queue) 2371 { 2372 if (queue == NULL) 2373 return; 2374 2375 rte_free(queue); 2376 } 2377 2378 static void 2379 bond_ethdev_slave_link_status_change_monitor(void *cb_arg) 2380 { 2381 struct rte_eth_dev *bonded_ethdev, *slave_ethdev; 2382 struct bond_dev_private *internals; 2383 2384 /* Default value for polling slave found is true as we don't want to 2385 * disable the polling thread if we cannot get the lock */ 2386 int i, polling_slave_found = 1; 2387 2388 if (cb_arg == NULL) 2389 return; 2390 2391 bonded_ethdev = (struct rte_eth_dev *)cb_arg; 2392 internals = (struct bond_dev_private *)bonded_ethdev->data->dev_private; 2393 2394 if (!bonded_ethdev->data->dev_started || 2395 !internals->link_status_polling_enabled) 2396 return; 2397 2398 /* If device is currently being configured then don't check slaves link 2399 * status, wait until next period */ 2400 if (rte_spinlock_trylock(&internals->lock)) { 2401 if (internals->slave_count > 0) 2402 polling_slave_found = 0; 2403 2404 for (i = 0; i < internals->slave_count; i++) { 2405 if (!internals->slaves[i].link_status_poll_enabled) 2406 continue; 2407 2408 slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id]; 2409 polling_slave_found = 1; 2410 2411 /* Update slave link status */ 2412 (*slave_ethdev->dev_ops->link_update)(slave_ethdev, 2413 internals->slaves[i].link_status_wait_to_complete); 2414 2415 /* if link status has changed since last checked then call lsc 2416 * event callback */ 2417 if (slave_ethdev->data->dev_link.link_status != 2418 internals->slaves[i].last_link_status) { 2419 internals->slaves[i].last_link_status = 2420 slave_ethdev->data->dev_link.link_status; 2421 2422 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id, 2423 RTE_ETH_EVENT_INTR_LSC, 2424 &bonded_ethdev->data->port_id, 2425 NULL); 2426 } 2427 } 2428 rte_spinlock_unlock(&internals->lock); 2429 } 2430 2431 if (polling_slave_found) 2432 /* Set alarm to continue monitoring link status of slave ethdev's */ 2433 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000, 2434 bond_ethdev_slave_link_status_change_monitor, cb_arg); 2435 } 2436 2437 static int 2438 bond_ethdev_link_update(struct rte_eth_dev *ethdev, int wait_to_complete) 2439 { 2440 void (*link_update)(uint16_t port_id, struct rte_eth_link *eth_link); 2441 2442 struct bond_dev_private *bond_ctx; 2443 struct rte_eth_link slave_link; 2444 2445 uint32_t idx; 2446 2447 bond_ctx = ethdev->data->dev_private; 2448 2449 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE; 2450 2451 if (ethdev->data->dev_started == 0 || 2452 bond_ctx->active_slave_count == 0) { 2453 ethdev->data->dev_link.link_status = ETH_LINK_DOWN; 2454 return 0; 2455 } 2456 2457 ethdev->data->dev_link.link_status = ETH_LINK_UP; 2458 2459 if (wait_to_complete) 2460 link_update = rte_eth_link_get; 2461 else 2462 link_update = rte_eth_link_get_nowait; 2463 2464 switch (bond_ctx->mode) { 2465 case BONDING_MODE_BROADCAST: 2466 /** 2467 * Setting link speed to UINT32_MAX to ensure we pick up the 2468 * value of the first active slave 2469 */ 2470 ethdev->data->dev_link.link_speed = UINT32_MAX; 2471 2472 /** 2473 * link speed is minimum value of all the slaves link speed as 2474 * packet loss will occur on this slave if transmission at rates 2475 * greater than this are attempted 2476 */ 2477 for (idx = 1; idx < bond_ctx->active_slave_count; idx++) { 2478 link_update(bond_ctx->active_slaves[0], &slave_link); 2479 2480 if (slave_link.link_speed < 2481 ethdev->data->dev_link.link_speed) 2482 ethdev->data->dev_link.link_speed = 2483 slave_link.link_speed; 2484 } 2485 break; 2486 case BONDING_MODE_ACTIVE_BACKUP: 2487 /* Current primary slave */ 2488 link_update(bond_ctx->current_primary_port, &slave_link); 2489 2490 ethdev->data->dev_link.link_speed = slave_link.link_speed; 2491 break; 2492 case BONDING_MODE_8023AD: 2493 ethdev->data->dev_link.link_autoneg = 2494 bond_ctx->mode4.slave_link.link_autoneg; 2495 ethdev->data->dev_link.link_duplex = 2496 bond_ctx->mode4.slave_link.link_duplex; 2497 /* fall through to update link speed */ 2498 case BONDING_MODE_ROUND_ROBIN: 2499 case BONDING_MODE_BALANCE: 2500 case BONDING_MODE_TLB: 2501 case BONDING_MODE_ALB: 2502 default: 2503 /** 2504 * In theses mode the maximum theoretical link speed is the sum 2505 * of all the slaves 2506 */ 2507 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE; 2508 2509 for (idx = 0; idx < bond_ctx->active_slave_count; idx++) { 2510 link_update(bond_ctx->active_slaves[idx], &slave_link); 2511 2512 ethdev->data->dev_link.link_speed += 2513 slave_link.link_speed; 2514 } 2515 } 2516 2517 2518 return 0; 2519 } 2520 2521 2522 static int 2523 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) 2524 { 2525 struct bond_dev_private *internals = dev->data->dev_private; 2526 struct rte_eth_stats slave_stats; 2527 int i, j; 2528 2529 for (i = 0; i < internals->slave_count; i++) { 2530 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats); 2531 2532 stats->ipackets += slave_stats.ipackets; 2533 stats->opackets += slave_stats.opackets; 2534 stats->ibytes += slave_stats.ibytes; 2535 stats->obytes += slave_stats.obytes; 2536 stats->imissed += slave_stats.imissed; 2537 stats->ierrors += slave_stats.ierrors; 2538 stats->oerrors += slave_stats.oerrors; 2539 stats->rx_nombuf += slave_stats.rx_nombuf; 2540 2541 for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) { 2542 stats->q_ipackets[j] += slave_stats.q_ipackets[j]; 2543 stats->q_opackets[j] += slave_stats.q_opackets[j]; 2544 stats->q_ibytes[j] += slave_stats.q_ibytes[j]; 2545 stats->q_obytes[j] += slave_stats.q_obytes[j]; 2546 stats->q_errors[j] += slave_stats.q_errors[j]; 2547 } 2548 2549 } 2550 2551 return 0; 2552 } 2553 2554 static void 2555 bond_ethdev_stats_reset(struct rte_eth_dev *dev) 2556 { 2557 struct bond_dev_private *internals = dev->data->dev_private; 2558 int i; 2559 2560 for (i = 0; i < internals->slave_count; i++) 2561 rte_eth_stats_reset(internals->slaves[i].port_id); 2562 } 2563 2564 static void 2565 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev) 2566 { 2567 struct bond_dev_private *internals = eth_dev->data->dev_private; 2568 int i; 2569 2570 internals->promiscuous_en = 1; 2571 2572 switch (internals->mode) { 2573 /* Promiscuous mode is propagated to all slaves */ 2574 case BONDING_MODE_ROUND_ROBIN: 2575 case BONDING_MODE_BALANCE: 2576 case BONDING_MODE_BROADCAST: 2577 for (i = 0; i < internals->slave_count; i++) 2578 rte_eth_promiscuous_enable(internals->slaves[i].port_id); 2579 break; 2580 /* In mode4 promiscus mode is managed when slave is added/removed */ 2581 case BONDING_MODE_8023AD: 2582 break; 2583 /* Promiscuous mode is propagated only to primary slave */ 2584 case BONDING_MODE_ACTIVE_BACKUP: 2585 case BONDING_MODE_TLB: 2586 case BONDING_MODE_ALB: 2587 default: 2588 rte_eth_promiscuous_enable(internals->current_primary_port); 2589 } 2590 } 2591 2592 static void 2593 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev) 2594 { 2595 struct bond_dev_private *internals = dev->data->dev_private; 2596 int i; 2597 2598 internals->promiscuous_en = 0; 2599 2600 switch (internals->mode) { 2601 /* Promiscuous mode is propagated to all slaves */ 2602 case BONDING_MODE_ROUND_ROBIN: 2603 case BONDING_MODE_BALANCE: 2604 case BONDING_MODE_BROADCAST: 2605 for (i = 0; i < internals->slave_count; i++) 2606 rte_eth_promiscuous_disable(internals->slaves[i].port_id); 2607 break; 2608 /* In mode4 promiscus mode is set managed when slave is added/removed */ 2609 case BONDING_MODE_8023AD: 2610 break; 2611 /* Promiscuous mode is propagated only to primary slave */ 2612 case BONDING_MODE_ACTIVE_BACKUP: 2613 case BONDING_MODE_TLB: 2614 case BONDING_MODE_ALB: 2615 default: 2616 rte_eth_promiscuous_disable(internals->current_primary_port); 2617 } 2618 } 2619 2620 static void 2621 bond_ethdev_delayed_lsc_propagation(void *arg) 2622 { 2623 if (arg == NULL) 2624 return; 2625 2626 _rte_eth_dev_callback_process((struct rte_eth_dev *)arg, 2627 RTE_ETH_EVENT_INTR_LSC, NULL); 2628 } 2629 2630 int 2631 bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type, 2632 void *param, void *ret_param __rte_unused) 2633 { 2634 struct rte_eth_dev *bonded_eth_dev; 2635 struct bond_dev_private *internals; 2636 struct rte_eth_link link; 2637 int rc = -1; 2638 2639 int i, valid_slave = 0; 2640 uint8_t active_pos; 2641 uint8_t lsc_flag = 0; 2642 2643 if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL) 2644 return rc; 2645 2646 bonded_eth_dev = &rte_eth_devices[*(uint8_t *)param]; 2647 2648 if (check_for_bonded_ethdev(bonded_eth_dev)) 2649 return rc; 2650 2651 internals = bonded_eth_dev->data->dev_private; 2652 2653 /* If the device isn't started don't handle interrupts */ 2654 if (!bonded_eth_dev->data->dev_started) 2655 return rc; 2656 2657 /* verify that port_id is a valid slave of bonded port */ 2658 for (i = 0; i < internals->slave_count; i++) { 2659 if (internals->slaves[i].port_id == port_id) { 2660 valid_slave = 1; 2661 break; 2662 } 2663 } 2664 2665 if (!valid_slave) 2666 return rc; 2667 2668 /* Search for port in active port list */ 2669 active_pos = find_slave_by_id(internals->active_slaves, 2670 internals->active_slave_count, port_id); 2671 2672 rte_eth_link_get_nowait(port_id, &link); 2673 if (link.link_status) { 2674 if (active_pos < internals->active_slave_count) 2675 return rc; 2676 2677 /* if no active slave ports then set this port to be primary port */ 2678 if (internals->active_slave_count < 1) { 2679 /* If first active slave, then change link status */ 2680 bonded_eth_dev->data->dev_link.link_status = ETH_LINK_UP; 2681 internals->current_primary_port = port_id; 2682 lsc_flag = 1; 2683 2684 mac_address_slaves_update(bonded_eth_dev); 2685 } 2686 2687 activate_slave(bonded_eth_dev, port_id); 2688 2689 /* If user has defined the primary port then default to using it */ 2690 if (internals->user_defined_primary_port && 2691 internals->primary_port == port_id) 2692 bond_ethdev_primary_set(internals, port_id); 2693 } else { 2694 if (active_pos == internals->active_slave_count) 2695 return rc; 2696 2697 /* Remove from active slave list */ 2698 deactivate_slave(bonded_eth_dev, port_id); 2699 2700 if (internals->active_slave_count < 1) 2701 lsc_flag = 1; 2702 2703 /* Update primary id, take first active slave from list or if none 2704 * available set to -1 */ 2705 if (port_id == internals->current_primary_port) { 2706 if (internals->active_slave_count > 0) 2707 bond_ethdev_primary_set(internals, 2708 internals->active_slaves[0]); 2709 else 2710 internals->current_primary_port = internals->primary_port; 2711 } 2712 } 2713 2714 /** 2715 * Update bonded device link properties after any change to active 2716 * slaves 2717 */ 2718 bond_ethdev_link_update(bonded_eth_dev, 0); 2719 2720 if (lsc_flag) { 2721 /* Cancel any possible outstanding interrupts if delays are enabled */ 2722 if (internals->link_up_delay_ms > 0 || 2723 internals->link_down_delay_ms > 0) 2724 rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation, 2725 bonded_eth_dev); 2726 2727 if (bonded_eth_dev->data->dev_link.link_status) { 2728 if (internals->link_up_delay_ms > 0) 2729 rte_eal_alarm_set(internals->link_up_delay_ms * 1000, 2730 bond_ethdev_delayed_lsc_propagation, 2731 (void *)bonded_eth_dev); 2732 else 2733 _rte_eth_dev_callback_process(bonded_eth_dev, 2734 RTE_ETH_EVENT_INTR_LSC, 2735 NULL); 2736 2737 } else { 2738 if (internals->link_down_delay_ms > 0) 2739 rte_eal_alarm_set(internals->link_down_delay_ms * 1000, 2740 bond_ethdev_delayed_lsc_propagation, 2741 (void *)bonded_eth_dev); 2742 else 2743 _rte_eth_dev_callback_process(bonded_eth_dev, 2744 RTE_ETH_EVENT_INTR_LSC, 2745 NULL); 2746 } 2747 } 2748 return 0; 2749 } 2750 2751 static int 2752 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev, 2753 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size) 2754 { 2755 unsigned i, j; 2756 int result = 0; 2757 int slave_reta_size; 2758 unsigned reta_count; 2759 struct bond_dev_private *internals = dev->data->dev_private; 2760 2761 if (reta_size != internals->reta_size) 2762 return -EINVAL; 2763 2764 /* Copy RETA table */ 2765 reta_count = reta_size / RTE_RETA_GROUP_SIZE; 2766 2767 for (i = 0; i < reta_count; i++) { 2768 internals->reta_conf[i].mask = reta_conf[i].mask; 2769 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++) 2770 if ((reta_conf[i].mask >> j) & 0x01) 2771 internals->reta_conf[i].reta[j] = reta_conf[i].reta[j]; 2772 } 2773 2774 /* Fill rest of array */ 2775 for (; i < RTE_DIM(internals->reta_conf); i += reta_count) 2776 memcpy(&internals->reta_conf[i], &internals->reta_conf[0], 2777 sizeof(internals->reta_conf[0]) * reta_count); 2778 2779 /* Propagate RETA over slaves */ 2780 for (i = 0; i < internals->slave_count; i++) { 2781 slave_reta_size = internals->slaves[i].reta_size; 2782 result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id, 2783 &internals->reta_conf[0], slave_reta_size); 2784 if (result < 0) 2785 return result; 2786 } 2787 2788 return 0; 2789 } 2790 2791 static int 2792 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev, 2793 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size) 2794 { 2795 int i, j; 2796 struct bond_dev_private *internals = dev->data->dev_private; 2797 2798 if (reta_size != internals->reta_size) 2799 return -EINVAL; 2800 2801 /* Copy RETA table */ 2802 for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++) 2803 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++) 2804 if ((reta_conf[i].mask >> j) & 0x01) 2805 reta_conf[i].reta[j] = internals->reta_conf[i].reta[j]; 2806 2807 return 0; 2808 } 2809 2810 static int 2811 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev, 2812 struct rte_eth_rss_conf *rss_conf) 2813 { 2814 int i, result = 0; 2815 struct bond_dev_private *internals = dev->data->dev_private; 2816 struct rte_eth_rss_conf bond_rss_conf; 2817 2818 memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf)); 2819 2820 bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads; 2821 2822 if (bond_rss_conf.rss_hf != 0) 2823 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf; 2824 2825 if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len < 2826 sizeof(internals->rss_key)) { 2827 if (bond_rss_conf.rss_key_len == 0) 2828 bond_rss_conf.rss_key_len = 40; 2829 internals->rss_key_len = bond_rss_conf.rss_key_len; 2830 memcpy(internals->rss_key, bond_rss_conf.rss_key, 2831 internals->rss_key_len); 2832 } 2833 2834 for (i = 0; i < internals->slave_count; i++) { 2835 result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id, 2836 &bond_rss_conf); 2837 if (result < 0) 2838 return result; 2839 } 2840 2841 return 0; 2842 } 2843 2844 static int 2845 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev, 2846 struct rte_eth_rss_conf *rss_conf) 2847 { 2848 struct bond_dev_private *internals = dev->data->dev_private; 2849 2850 rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf; 2851 rss_conf->rss_key_len = internals->rss_key_len; 2852 if (rss_conf->rss_key) 2853 memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len); 2854 2855 return 0; 2856 } 2857 2858 static int 2859 bond_ethdev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) 2860 { 2861 struct rte_eth_dev *slave_eth_dev; 2862 struct bond_dev_private *internals = dev->data->dev_private; 2863 int ret, i; 2864 2865 rte_spinlock_lock(&internals->lock); 2866 2867 for (i = 0; i < internals->slave_count; i++) { 2868 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id]; 2869 if (*slave_eth_dev->dev_ops->mtu_set == NULL) { 2870 rte_spinlock_unlock(&internals->lock); 2871 return -ENOTSUP; 2872 } 2873 } 2874 for (i = 0; i < internals->slave_count; i++) { 2875 ret = rte_eth_dev_set_mtu(internals->slaves[i].port_id, mtu); 2876 if (ret < 0) { 2877 rte_spinlock_unlock(&internals->lock); 2878 return ret; 2879 } 2880 } 2881 2882 rte_spinlock_unlock(&internals->lock); 2883 return 0; 2884 } 2885 2886 static int 2887 bond_ethdev_mac_address_set(struct rte_eth_dev *dev, struct ether_addr *addr) 2888 { 2889 if (mac_address_set(dev, addr)) { 2890 RTE_BOND_LOG(ERR, "Failed to update MAC address"); 2891 return -EINVAL; 2892 } 2893 2894 return 0; 2895 } 2896 2897 static int 2898 bond_filter_ctrl(struct rte_eth_dev *dev __rte_unused, 2899 enum rte_filter_type type, enum rte_filter_op op, void *arg) 2900 { 2901 if (type == RTE_ETH_FILTER_GENERIC && op == RTE_ETH_FILTER_GET) { 2902 *(const void **)arg = &bond_flow_ops; 2903 return 0; 2904 } 2905 return -ENOTSUP; 2906 } 2907 2908 const struct eth_dev_ops default_dev_ops = { 2909 .dev_start = bond_ethdev_start, 2910 .dev_stop = bond_ethdev_stop, 2911 .dev_close = bond_ethdev_close, 2912 .dev_configure = bond_ethdev_configure, 2913 .dev_infos_get = bond_ethdev_info, 2914 .vlan_filter_set = bond_ethdev_vlan_filter_set, 2915 .rx_queue_setup = bond_ethdev_rx_queue_setup, 2916 .tx_queue_setup = bond_ethdev_tx_queue_setup, 2917 .rx_queue_release = bond_ethdev_rx_queue_release, 2918 .tx_queue_release = bond_ethdev_tx_queue_release, 2919 .link_update = bond_ethdev_link_update, 2920 .stats_get = bond_ethdev_stats_get, 2921 .stats_reset = bond_ethdev_stats_reset, 2922 .promiscuous_enable = bond_ethdev_promiscuous_enable, 2923 .promiscuous_disable = bond_ethdev_promiscuous_disable, 2924 .reta_update = bond_ethdev_rss_reta_update, 2925 .reta_query = bond_ethdev_rss_reta_query, 2926 .rss_hash_update = bond_ethdev_rss_hash_update, 2927 .rss_hash_conf_get = bond_ethdev_rss_hash_conf_get, 2928 .mtu_set = bond_ethdev_mtu_set, 2929 .mac_addr_set = bond_ethdev_mac_address_set, 2930 .filter_ctrl = bond_filter_ctrl 2931 }; 2932 2933 static int 2934 bond_alloc(struct rte_vdev_device *dev, uint8_t mode) 2935 { 2936 const char *name = rte_vdev_device_name(dev); 2937 uint8_t socket_id = dev->device.numa_node; 2938 struct bond_dev_private *internals = NULL; 2939 struct rte_eth_dev *eth_dev = NULL; 2940 uint32_t vlan_filter_bmp_size; 2941 2942 /* now do all data allocation - for eth_dev structure, dummy pci driver 2943 * and internal (private) data 2944 */ 2945 2946 /* reserve an ethdev entry */ 2947 eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internals)); 2948 if (eth_dev == NULL) { 2949 RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev"); 2950 goto err; 2951 } 2952 2953 internals = eth_dev->data->dev_private; 2954 eth_dev->data->nb_rx_queues = (uint16_t)1; 2955 eth_dev->data->nb_tx_queues = (uint16_t)1; 2956 2957 eth_dev->data->mac_addrs = rte_zmalloc_socket(name, ETHER_ADDR_LEN, 0, 2958 socket_id); 2959 if (eth_dev->data->mac_addrs == NULL) { 2960 RTE_BOND_LOG(ERR, "Unable to malloc mac_addrs"); 2961 goto err; 2962 } 2963 2964 eth_dev->dev_ops = &default_dev_ops; 2965 eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC; 2966 2967 rte_spinlock_init(&internals->lock); 2968 2969 internals->port_id = eth_dev->data->port_id; 2970 internals->mode = BONDING_MODE_INVALID; 2971 internals->current_primary_port = RTE_MAX_ETHPORTS + 1; 2972 internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2; 2973 internals->burst_xmit_hash = burst_xmit_l2_hash; 2974 internals->user_defined_mac = 0; 2975 2976 internals->link_status_polling_enabled = 0; 2977 2978 internals->link_status_polling_interval_ms = 2979 DEFAULT_POLLING_INTERVAL_10_MS; 2980 internals->link_down_delay_ms = 0; 2981 internals->link_up_delay_ms = 0; 2982 2983 internals->slave_count = 0; 2984 internals->active_slave_count = 0; 2985 internals->rx_offload_capa = 0; 2986 internals->tx_offload_capa = 0; 2987 internals->rx_queue_offload_capa = 0; 2988 internals->tx_queue_offload_capa = 0; 2989 internals->candidate_max_rx_pktlen = 0; 2990 internals->max_rx_pktlen = 0; 2991 2992 /* Initially allow to choose any offload type */ 2993 internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK; 2994 2995 memset(internals->active_slaves, 0, sizeof(internals->active_slaves)); 2996 memset(internals->slaves, 0, sizeof(internals->slaves)); 2997 2998 TAILQ_INIT(&internals->flow_list); 2999 internals->flow_isolated_valid = 0; 3000 3001 /* Set mode 4 default configuration */ 3002 bond_mode_8023ad_setup(eth_dev, NULL); 3003 if (bond_ethdev_mode_set(eth_dev, mode)) { 3004 RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode to %d\n", 3005 eth_dev->data->port_id, mode); 3006 goto err; 3007 } 3008 3009 vlan_filter_bmp_size = 3010 rte_bitmap_get_memory_footprint(ETHER_MAX_VLAN_ID + 1); 3011 internals->vlan_filter_bmpmem = rte_malloc(name, vlan_filter_bmp_size, 3012 RTE_CACHE_LINE_SIZE); 3013 if (internals->vlan_filter_bmpmem == NULL) { 3014 RTE_BOND_LOG(ERR, 3015 "Failed to allocate vlan bitmap for bonded device %u\n", 3016 eth_dev->data->port_id); 3017 goto err; 3018 } 3019 3020 internals->vlan_filter_bmp = rte_bitmap_init(ETHER_MAX_VLAN_ID + 1, 3021 internals->vlan_filter_bmpmem, vlan_filter_bmp_size); 3022 if (internals->vlan_filter_bmp == NULL) { 3023 RTE_BOND_LOG(ERR, 3024 "Failed to init vlan bitmap for bonded device %u\n", 3025 eth_dev->data->port_id); 3026 rte_free(internals->vlan_filter_bmpmem); 3027 goto err; 3028 } 3029 3030 return eth_dev->data->port_id; 3031 3032 err: 3033 rte_free(internals); 3034 if (eth_dev != NULL) { 3035 rte_free(eth_dev->data->mac_addrs); 3036 rte_eth_dev_release_port(eth_dev); 3037 } 3038 return -1; 3039 } 3040 3041 static int 3042 bond_probe(struct rte_vdev_device *dev) 3043 { 3044 const char *name; 3045 struct bond_dev_private *internals; 3046 struct rte_kvargs *kvlist; 3047 uint8_t bonding_mode, socket_id/*, agg_mode*/; 3048 int arg_count, port_id; 3049 uint8_t agg_mode; 3050 struct rte_eth_dev *eth_dev; 3051 3052 if (!dev) 3053 return -EINVAL; 3054 3055 name = rte_vdev_device_name(dev); 3056 RTE_LOG(INFO, EAL, "Initializing pmd_bond for %s\n", name); 3057 3058 if (rte_eal_process_type() == RTE_PROC_SECONDARY && 3059 strlen(rte_vdev_device_args(dev)) == 0) { 3060 eth_dev = rte_eth_dev_attach_secondary(name); 3061 if (!eth_dev) { 3062 RTE_LOG(ERR, PMD, "Failed to probe %s\n", name); 3063 return -1; 3064 } 3065 /* TODO: request info from primary to set up Rx and Tx */ 3066 eth_dev->dev_ops = &default_dev_ops; 3067 return 0; 3068 } 3069 3070 kvlist = rte_kvargs_parse(rte_vdev_device_args(dev), 3071 pmd_bond_init_valid_arguments); 3072 if (kvlist == NULL) 3073 return -1; 3074 3075 /* Parse link bonding mode */ 3076 if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) { 3077 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG, 3078 &bond_ethdev_parse_slave_mode_kvarg, 3079 &bonding_mode) != 0) { 3080 RTE_LOG(ERR, EAL, "Invalid mode for bonded device %s\n", 3081 name); 3082 goto parse_error; 3083 } 3084 } else { 3085 RTE_LOG(ERR, EAL, "Mode must be specified only once for bonded " 3086 "device %s\n", name); 3087 goto parse_error; 3088 } 3089 3090 /* Parse socket id to create bonding device on */ 3091 arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG); 3092 if (arg_count == 1) { 3093 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG, 3094 &bond_ethdev_parse_socket_id_kvarg, &socket_id) 3095 != 0) { 3096 RTE_LOG(ERR, EAL, "Invalid socket Id specified for " 3097 "bonded device %s\n", name); 3098 goto parse_error; 3099 } 3100 } else if (arg_count > 1) { 3101 RTE_LOG(ERR, EAL, "Socket Id can be specified only once for " 3102 "bonded device %s\n", name); 3103 goto parse_error; 3104 } else { 3105 socket_id = rte_socket_id(); 3106 } 3107 3108 dev->device.numa_node = socket_id; 3109 3110 /* Create link bonding eth device */ 3111 port_id = bond_alloc(dev, bonding_mode); 3112 if (port_id < 0) { 3113 RTE_LOG(ERR, EAL, "Failed to create socket %s in mode %u on " 3114 "socket %u.\n", name, bonding_mode, socket_id); 3115 goto parse_error; 3116 } 3117 internals = rte_eth_devices[port_id].data->dev_private; 3118 internals->kvlist = kvlist; 3119 3120 3121 if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) { 3122 if (rte_kvargs_process(kvlist, 3123 PMD_BOND_AGG_MODE_KVARG, 3124 &bond_ethdev_parse_slave_agg_mode_kvarg, 3125 &agg_mode) != 0) { 3126 RTE_LOG(ERR, EAL, 3127 "Failed to parse agg selection mode for bonded device %s\n", 3128 name); 3129 goto parse_error; 3130 } 3131 3132 if (internals->mode == BONDING_MODE_8023AD) 3133 rte_eth_bond_8023ad_agg_selection_set(port_id, 3134 agg_mode); 3135 } else { 3136 rte_eth_bond_8023ad_agg_selection_set(port_id, AGG_STABLE); 3137 } 3138 3139 RTE_LOG(INFO, EAL, "Create bonded device %s on port %d in mode %u on " 3140 "socket %u.\n", name, port_id, bonding_mode, socket_id); 3141 return 0; 3142 3143 parse_error: 3144 rte_kvargs_free(kvlist); 3145 3146 return -1; 3147 } 3148 3149 static int 3150 bond_remove(struct rte_vdev_device *dev) 3151 { 3152 struct rte_eth_dev *eth_dev; 3153 struct bond_dev_private *internals; 3154 const char *name; 3155 3156 if (!dev) 3157 return -EINVAL; 3158 3159 name = rte_vdev_device_name(dev); 3160 RTE_LOG(INFO, EAL, "Uninitializing pmd_bond for %s\n", name); 3161 3162 /* now free all data allocation - for eth_dev structure, 3163 * dummy pci driver and internal (private) data 3164 */ 3165 3166 /* find an ethdev entry */ 3167 eth_dev = rte_eth_dev_allocated(name); 3168 if (eth_dev == NULL) 3169 return -ENODEV; 3170 3171 RTE_ASSERT(eth_dev->device == &dev->device); 3172 3173 internals = eth_dev->data->dev_private; 3174 if (internals->slave_count != 0) 3175 return -EBUSY; 3176 3177 if (eth_dev->data->dev_started == 1) { 3178 bond_ethdev_stop(eth_dev); 3179 bond_ethdev_close(eth_dev); 3180 } 3181 3182 eth_dev->dev_ops = NULL; 3183 eth_dev->rx_pkt_burst = NULL; 3184 eth_dev->tx_pkt_burst = NULL; 3185 3186 internals = eth_dev->data->dev_private; 3187 /* Try to release mempool used in mode6. If the bond 3188 * device is not mode6, free the NULL is not problem. 3189 */ 3190 rte_mempool_free(internals->mode6.mempool); 3191 rte_bitmap_free(internals->vlan_filter_bmp); 3192 rte_free(internals->vlan_filter_bmpmem); 3193 rte_free(eth_dev->data->dev_private); 3194 rte_free(eth_dev->data->mac_addrs); 3195 3196 rte_eth_dev_release_port(eth_dev); 3197 3198 return 0; 3199 } 3200 3201 /* this part will resolve the slave portids after all the other pdev and vdev 3202 * have been allocated */ 3203 static int 3204 bond_ethdev_configure(struct rte_eth_dev *dev) 3205 { 3206 const char *name = dev->device->name; 3207 struct bond_dev_private *internals = dev->data->dev_private; 3208 struct rte_kvargs *kvlist = internals->kvlist; 3209 int arg_count; 3210 uint16_t port_id = dev - rte_eth_devices; 3211 uint8_t agg_mode; 3212 3213 static const uint8_t default_rss_key[40] = { 3214 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D, 3215 0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4, 3216 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B, 3217 0xBE, 0xAC, 0x01, 0xFA 3218 }; 3219 3220 unsigned i, j; 3221 3222 /* If RSS is enabled, fill table and key with default values */ 3223 if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) { 3224 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = internals->rss_key; 3225 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len = 0; 3226 memcpy(internals->rss_key, default_rss_key, 40); 3227 3228 for (i = 0; i < RTE_DIM(internals->reta_conf); i++) { 3229 internals->reta_conf[i].mask = ~0LL; 3230 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++) 3231 internals->reta_conf[i].reta[j] = j % dev->data->nb_rx_queues; 3232 } 3233 } 3234 3235 /* set the max_rx_pktlen */ 3236 internals->max_rx_pktlen = internals->candidate_max_rx_pktlen; 3237 3238 /* 3239 * if no kvlist, it means that this bonded device has been created 3240 * through the bonding api. 3241 */ 3242 if (!kvlist) 3243 return 0; 3244 3245 /* Parse MAC address for bonded device */ 3246 arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG); 3247 if (arg_count == 1) { 3248 struct ether_addr bond_mac; 3249 3250 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG, 3251 &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) { 3252 RTE_LOG(INFO, EAL, "Invalid mac address for bonded device %s\n", 3253 name); 3254 return -1; 3255 } 3256 3257 /* Set MAC address */ 3258 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) { 3259 RTE_LOG(ERR, EAL, 3260 "Failed to set mac address on bonded device %s\n", 3261 name); 3262 return -1; 3263 } 3264 } else if (arg_count > 1) { 3265 RTE_LOG(ERR, EAL, 3266 "MAC address can be specified only once for bonded device %s\n", 3267 name); 3268 return -1; 3269 } 3270 3271 /* Parse/set balance mode transmit policy */ 3272 arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG); 3273 if (arg_count == 1) { 3274 uint8_t xmit_policy; 3275 3276 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG, 3277 &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) != 3278 0) { 3279 RTE_LOG(INFO, EAL, 3280 "Invalid xmit policy specified for bonded device %s\n", 3281 name); 3282 return -1; 3283 } 3284 3285 /* Set balance mode transmit policy*/ 3286 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) { 3287 RTE_LOG(ERR, EAL, 3288 "Failed to set balance xmit policy on bonded device %s\n", 3289 name); 3290 return -1; 3291 } 3292 } else if (arg_count > 1) { 3293 RTE_LOG(ERR, EAL, 3294 "Transmit policy can be specified only once for bonded device" 3295 " %s\n", name); 3296 return -1; 3297 } 3298 3299 if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) { 3300 if (rte_kvargs_process(kvlist, 3301 PMD_BOND_AGG_MODE_KVARG, 3302 &bond_ethdev_parse_slave_agg_mode_kvarg, 3303 &agg_mode) != 0) { 3304 RTE_LOG(ERR, EAL, 3305 "Failed to parse agg selection mode for bonded device %s\n", 3306 name); 3307 } 3308 if (internals->mode == BONDING_MODE_8023AD) 3309 rte_eth_bond_8023ad_agg_selection_set(port_id, 3310 agg_mode); 3311 } 3312 3313 /* Parse/add slave ports to bonded device */ 3314 if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) { 3315 struct bond_ethdev_slave_ports slave_ports; 3316 unsigned i; 3317 3318 memset(&slave_ports, 0, sizeof(slave_ports)); 3319 3320 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG, 3321 &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) { 3322 RTE_LOG(ERR, EAL, 3323 "Failed to parse slave ports for bonded device %s\n", 3324 name); 3325 return -1; 3326 } 3327 3328 for (i = 0; i < slave_ports.slave_count; i++) { 3329 if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) { 3330 RTE_LOG(ERR, EAL, 3331 "Failed to add port %d as slave to bonded device %s\n", 3332 slave_ports.slaves[i], name); 3333 } 3334 } 3335 3336 } else { 3337 RTE_LOG(INFO, EAL, "No slaves specified for bonded device %s\n", name); 3338 return -1; 3339 } 3340 3341 /* Parse/set primary slave port id*/ 3342 arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG); 3343 if (arg_count == 1) { 3344 uint16_t primary_slave_port_id; 3345 3346 if (rte_kvargs_process(kvlist, 3347 PMD_BOND_PRIMARY_SLAVE_KVARG, 3348 &bond_ethdev_parse_primary_slave_port_id_kvarg, 3349 &primary_slave_port_id) < 0) { 3350 RTE_LOG(INFO, EAL, 3351 "Invalid primary slave port id specified for bonded device" 3352 " %s\n", name); 3353 return -1; 3354 } 3355 3356 /* Set balance mode transmit policy*/ 3357 if (rte_eth_bond_primary_set(port_id, primary_slave_port_id) 3358 != 0) { 3359 RTE_LOG(ERR, EAL, 3360 "Failed to set primary slave port %d on bonded device %s\n", 3361 primary_slave_port_id, name); 3362 return -1; 3363 } 3364 } else if (arg_count > 1) { 3365 RTE_LOG(INFO, EAL, 3366 "Primary slave can be specified only once for bonded device" 3367 " %s\n", name); 3368 return -1; 3369 } 3370 3371 /* Parse link status monitor polling interval */ 3372 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG); 3373 if (arg_count == 1) { 3374 uint32_t lsc_poll_interval_ms; 3375 3376 if (rte_kvargs_process(kvlist, 3377 PMD_BOND_LSC_POLL_PERIOD_KVARG, 3378 &bond_ethdev_parse_time_ms_kvarg, 3379 &lsc_poll_interval_ms) < 0) { 3380 RTE_LOG(INFO, EAL, 3381 "Invalid lsc polling interval value specified for bonded" 3382 " device %s\n", name); 3383 return -1; 3384 } 3385 3386 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms) 3387 != 0) { 3388 RTE_LOG(ERR, EAL, 3389 "Failed to set lsc monitor polling interval (%u ms) on" 3390 " bonded device %s\n", lsc_poll_interval_ms, name); 3391 return -1; 3392 } 3393 } else if (arg_count > 1) { 3394 RTE_LOG(INFO, EAL, 3395 "LSC polling interval can be specified only once for bonded" 3396 " device %s\n", name); 3397 return -1; 3398 } 3399 3400 /* Parse link up interrupt propagation delay */ 3401 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG); 3402 if (arg_count == 1) { 3403 uint32_t link_up_delay_ms; 3404 3405 if (rte_kvargs_process(kvlist, 3406 PMD_BOND_LINK_UP_PROP_DELAY_KVARG, 3407 &bond_ethdev_parse_time_ms_kvarg, 3408 &link_up_delay_ms) < 0) { 3409 RTE_LOG(INFO, EAL, 3410 "Invalid link up propagation delay value specified for" 3411 " bonded device %s\n", name); 3412 return -1; 3413 } 3414 3415 /* Set balance mode transmit policy*/ 3416 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms) 3417 != 0) { 3418 RTE_LOG(ERR, EAL, 3419 "Failed to set link up propagation delay (%u ms) on bonded" 3420 " device %s\n", link_up_delay_ms, name); 3421 return -1; 3422 } 3423 } else if (arg_count > 1) { 3424 RTE_LOG(INFO, EAL, 3425 "Link up propagation delay can be specified only once for" 3426 " bonded device %s\n", name); 3427 return -1; 3428 } 3429 3430 /* Parse link down interrupt propagation delay */ 3431 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG); 3432 if (arg_count == 1) { 3433 uint32_t link_down_delay_ms; 3434 3435 if (rte_kvargs_process(kvlist, 3436 PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG, 3437 &bond_ethdev_parse_time_ms_kvarg, 3438 &link_down_delay_ms) < 0) { 3439 RTE_LOG(INFO, EAL, 3440 "Invalid link down propagation delay value specified for" 3441 " bonded device %s\n", name); 3442 return -1; 3443 } 3444 3445 /* Set balance mode transmit policy*/ 3446 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms) 3447 != 0) { 3448 RTE_LOG(ERR, EAL, 3449 "Failed to set link down propagation delay (%u ms) on" 3450 " bonded device %s\n", link_down_delay_ms, name); 3451 return -1; 3452 } 3453 } else if (arg_count > 1) { 3454 RTE_LOG(INFO, EAL, 3455 "Link down propagation delay can be specified only once for" 3456 " bonded device %s\n", name); 3457 return -1; 3458 } 3459 3460 return 0; 3461 } 3462 3463 struct rte_vdev_driver pmd_bond_drv = { 3464 .probe = bond_probe, 3465 .remove = bond_remove, 3466 }; 3467 3468 RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv); 3469 RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond); 3470 3471 RTE_PMD_REGISTER_PARAM_STRING(net_bonding, 3472 "slave=<ifc> " 3473 "primary=<ifc> " 3474 "mode=[0-6] " 3475 "xmit_policy=[l2 | l23 | l34] " 3476 "agg_mode=[count | stable | bandwidth] " 3477 "socket_id=<int> " 3478 "mac=<mac addr> " 3479 "lsc_poll_period_ms=<int> " 3480 "up_delay=<int> " 3481 "down_delay=<int>"); 3482