1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2014 Intel Corporation 3 */ 4 5 #include <stdio.h> 6 #include <stdlib.h> 7 #include <stdint.h> 8 #include <inttypes.h> 9 #include <sys/types.h> 10 #include <sys/param.h> 11 #include <string.h> 12 #include <sys/queue.h> 13 #include <stdarg.h> 14 #include <errno.h> 15 #include <getopt.h> 16 17 #include <rte_common.h> 18 #include <rte_byteorder.h> 19 #include <rte_log.h> 20 #include <rte_memory.h> 21 #include <rte_memcpy.h> 22 #include <rte_eal.h> 23 #include <rte_launch.h> 24 #include <rte_atomic.h> 25 #include <rte_cycles.h> 26 #include <rte_prefetch.h> 27 #include <rte_lcore.h> 28 #include <rte_per_lcore.h> 29 #include <rte_branch_prediction.h> 30 #include <rte_interrupts.h> 31 #include <rte_random.h> 32 #include <rte_debug.h> 33 #include <rte_ether.h> 34 #include <rte_ethdev.h> 35 #include <rte_mempool.h> 36 #include <rte_mbuf.h> 37 #include <rte_lpm.h> 38 #include <rte_lpm6.h> 39 #include <rte_ip.h> 40 #include <rte_string_fns.h> 41 42 #include <rte_ip_frag.h> 43 44 #define RTE_LOGTYPE_IP_FRAG RTE_LOGTYPE_USER1 45 46 /* allow max jumbo frame 9.5 KB */ 47 #define JUMBO_FRAME_MAX_SIZE 0x2600 48 49 #define ROUNDUP_DIV(a, b) (((a) + (b) - 1) / (b)) 50 51 /* 52 * Default byte size for the IPv6 Maximum Transfer Unit (MTU). 53 * This value includes the size of IPv6 header. 54 */ 55 #define IPV4_MTU_DEFAULT RTE_ETHER_MTU 56 #define IPV6_MTU_DEFAULT RTE_ETHER_MTU 57 58 /* 59 * The overhead from max frame size to MTU. 60 * We have to consider the max possible overhead. 61 */ 62 #define MTU_OVERHEAD \ 63 (RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN + \ 64 2 * sizeof(struct rte_vlan_hdr)) 65 66 /* 67 * Default payload in bytes for the IPv6 packet. 68 */ 69 #define IPV4_DEFAULT_PAYLOAD (IPV4_MTU_DEFAULT - sizeof(struct rte_ipv4_hdr)) 70 #define IPV6_DEFAULT_PAYLOAD (IPV6_MTU_DEFAULT - sizeof(struct rte_ipv6_hdr)) 71 72 /* 73 * Max number of fragments per packet expected - defined by config file. 74 */ 75 #define MAX_PACKET_FRAG RTE_LIBRTE_IP_FRAG_MAX_FRAG 76 77 #define NB_MBUF 8192 78 79 #define MAX_PKT_BURST 32 80 #define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */ 81 82 /* Configure how many packets ahead to prefetch, when reading packets */ 83 #define PREFETCH_OFFSET 3 84 85 /* 86 * Configurable number of RX/TX ring descriptors 87 */ 88 #define RTE_TEST_RX_DESC_DEFAULT 1024 89 #define RTE_TEST_TX_DESC_DEFAULT 1024 90 static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT; 91 static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT; 92 93 /* ethernet addresses of ports */ 94 static struct rte_ether_addr ports_eth_addr[RTE_MAX_ETHPORTS]; 95 96 #ifndef IPv4_BYTES 97 #define IPv4_BYTES_FMT "%" PRIu8 ".%" PRIu8 ".%" PRIu8 ".%" PRIu8 98 #define IPv4_BYTES(addr) \ 99 (uint8_t) (((addr) >> 24) & 0xFF),\ 100 (uint8_t) (((addr) >> 16) & 0xFF),\ 101 (uint8_t) (((addr) >> 8) & 0xFF),\ 102 (uint8_t) ((addr) & 0xFF) 103 #endif 104 105 #ifndef IPv6_BYTES 106 #define IPv6_BYTES_FMT "%02x%02x:%02x%02x:%02x%02x:%02x%02x:"\ 107 "%02x%02x:%02x%02x:%02x%02x:%02x%02x" 108 #define IPv6_BYTES(addr) \ 109 addr[0], addr[1], addr[2], addr[3], \ 110 addr[4], addr[5], addr[6], addr[7], \ 111 addr[8], addr[9], addr[10], addr[11],\ 112 addr[12], addr[13],addr[14], addr[15] 113 #endif 114 115 #define IPV6_ADDR_LEN 16 116 117 /* mask of enabled ports */ 118 static int enabled_port_mask = 0; 119 120 static int rx_queue_per_lcore = 1; 121 122 #define MBUF_TABLE_SIZE (2 * MAX(MAX_PKT_BURST, MAX_PACKET_FRAG)) 123 124 struct mbuf_table { 125 uint16_t len; 126 struct rte_mbuf *m_table[MBUF_TABLE_SIZE]; 127 }; 128 129 struct rx_queue { 130 struct rte_mempool *direct_pool; 131 struct rte_mempool *indirect_pool; 132 struct rte_lpm *lpm; 133 struct rte_lpm6 *lpm6; 134 uint16_t portid; 135 }; 136 137 #define MAX_RX_QUEUE_PER_LCORE 16 138 #define MAX_TX_QUEUE_PER_PORT 16 139 struct lcore_queue_conf { 140 uint16_t n_rx_queue; 141 uint16_t tx_queue_id[RTE_MAX_ETHPORTS]; 142 struct rx_queue rx_queue_list[MAX_RX_QUEUE_PER_LCORE]; 143 struct mbuf_table tx_mbufs[RTE_MAX_ETHPORTS]; 144 } __rte_cache_aligned; 145 struct lcore_queue_conf lcore_queue_conf[RTE_MAX_LCORE]; 146 147 static struct rte_eth_conf port_conf = { 148 .rxmode = { 149 .max_rx_pkt_len = JUMBO_FRAME_MAX_SIZE, 150 .split_hdr_size = 0, 151 .offloads = (DEV_RX_OFFLOAD_CHECKSUM | 152 DEV_RX_OFFLOAD_SCATTER | 153 DEV_RX_OFFLOAD_JUMBO_FRAME), 154 }, 155 .txmode = { 156 .mq_mode = ETH_MQ_TX_NONE, 157 .offloads = (DEV_TX_OFFLOAD_IPV4_CKSUM | 158 DEV_TX_OFFLOAD_MULTI_SEGS), 159 }, 160 }; 161 162 /* 163 * IPv4 forwarding table 164 */ 165 struct l3fwd_ipv4_route { 166 uint32_t ip; 167 uint8_t depth; 168 uint8_t if_out; 169 }; 170 171 struct l3fwd_ipv4_route l3fwd_ipv4_route_array[] = { 172 {RTE_IPV4(100,10,0,0), 16, 0}, 173 {RTE_IPV4(100,20,0,0), 16, 1}, 174 {RTE_IPV4(100,30,0,0), 16, 2}, 175 {RTE_IPV4(100,40,0,0), 16, 3}, 176 {RTE_IPV4(100,50,0,0), 16, 4}, 177 {RTE_IPV4(100,60,0,0), 16, 5}, 178 {RTE_IPV4(100,70,0,0), 16, 6}, 179 {RTE_IPV4(100,80,0,0), 16, 7}, 180 }; 181 182 /* 183 * IPv6 forwarding table 184 */ 185 186 struct l3fwd_ipv6_route { 187 uint8_t ip[IPV6_ADDR_LEN]; 188 uint8_t depth; 189 uint8_t if_out; 190 }; 191 192 static struct l3fwd_ipv6_route l3fwd_ipv6_route_array[] = { 193 {{1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 0}, 194 {{2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 1}, 195 {{3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 2}, 196 {{4,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 3}, 197 {{5,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 4}, 198 {{6,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 5}, 199 {{7,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 6}, 200 {{8,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 7}, 201 }; 202 203 #define LPM_MAX_RULES 1024 204 #define LPM6_MAX_RULES 1024 205 #define LPM6_NUMBER_TBL8S (1 << 16) 206 207 struct rte_lpm6_config lpm6_config = { 208 .max_rules = LPM6_MAX_RULES, 209 .number_tbl8s = LPM6_NUMBER_TBL8S, 210 .flags = 0 211 }; 212 213 static struct rte_mempool *socket_direct_pool[RTE_MAX_NUMA_NODES]; 214 static struct rte_mempool *socket_indirect_pool[RTE_MAX_NUMA_NODES]; 215 static struct rte_lpm *socket_lpm[RTE_MAX_NUMA_NODES]; 216 static struct rte_lpm6 *socket_lpm6[RTE_MAX_NUMA_NODES]; 217 218 /* Send burst of packets on an output interface */ 219 static inline int 220 send_burst(struct lcore_queue_conf *qconf, uint16_t n, uint16_t port) 221 { 222 struct rte_mbuf **m_table; 223 int ret; 224 uint16_t queueid; 225 226 queueid = qconf->tx_queue_id[port]; 227 m_table = (struct rte_mbuf **)qconf->tx_mbufs[port].m_table; 228 229 ret = rte_eth_tx_burst(port, queueid, m_table, n); 230 if (unlikely(ret < n)) { 231 do { 232 rte_pktmbuf_free(m_table[ret]); 233 } while (++ret < n); 234 } 235 236 return 0; 237 } 238 239 static inline void 240 l3fwd_simple_forward(struct rte_mbuf *m, struct lcore_queue_conf *qconf, 241 uint8_t queueid, uint16_t port_in) 242 { 243 struct rx_queue *rxq; 244 uint32_t i, len, next_hop; 245 uint8_t ipv6; 246 uint16_t port_out; 247 int32_t len2; 248 249 ipv6 = 0; 250 rxq = &qconf->rx_queue_list[queueid]; 251 252 /* by default, send everything back to the source port */ 253 port_out = port_in; 254 255 /* Remove the Ethernet header and trailer from the input packet */ 256 rte_pktmbuf_adj(m, (uint16_t)sizeof(struct rte_ether_hdr)); 257 258 /* Build transmission burst */ 259 len = qconf->tx_mbufs[port_out].len; 260 261 /* if this is an IPv4 packet */ 262 if (RTE_ETH_IS_IPV4_HDR(m->packet_type)) { 263 struct rte_ipv4_hdr *ip_hdr; 264 uint32_t ip_dst; 265 /* Read the lookup key (i.e. ip_dst) from the input packet */ 266 ip_hdr = rte_pktmbuf_mtod(m, struct rte_ipv4_hdr *); 267 ip_dst = rte_be_to_cpu_32(ip_hdr->dst_addr); 268 269 /* Find destination port */ 270 if (rte_lpm_lookup(rxq->lpm, ip_dst, &next_hop) == 0 && 271 (enabled_port_mask & 1 << next_hop) != 0) { 272 port_out = next_hop; 273 274 /* Build transmission burst for new port */ 275 len = qconf->tx_mbufs[port_out].len; 276 } 277 278 /* if we don't need to do any fragmentation */ 279 if (likely (IPV4_MTU_DEFAULT >= m->pkt_len)) { 280 qconf->tx_mbufs[port_out].m_table[len] = m; 281 len2 = 1; 282 } else { 283 len2 = rte_ipv4_fragment_packet(m, 284 &qconf->tx_mbufs[port_out].m_table[len], 285 (uint16_t)(MBUF_TABLE_SIZE - len), 286 IPV4_MTU_DEFAULT, 287 rxq->direct_pool, rxq->indirect_pool); 288 289 /* Free input packet */ 290 rte_pktmbuf_free(m); 291 292 /* If we fail to fragment the packet */ 293 if (unlikely (len2 < 0)) 294 return; 295 } 296 } else if (RTE_ETH_IS_IPV6_HDR(m->packet_type)) { 297 /* if this is an IPv6 packet */ 298 struct rte_ipv6_hdr *ip_hdr; 299 300 ipv6 = 1; 301 302 /* Read the lookup key (i.e. ip_dst) from the input packet */ 303 ip_hdr = rte_pktmbuf_mtod(m, struct rte_ipv6_hdr *); 304 305 /* Find destination port */ 306 if (rte_lpm6_lookup(rxq->lpm6, ip_hdr->dst_addr, 307 &next_hop) == 0 && 308 (enabled_port_mask & 1 << next_hop) != 0) { 309 port_out = next_hop; 310 311 /* Build transmission burst for new port */ 312 len = qconf->tx_mbufs[port_out].len; 313 } 314 315 /* if we don't need to do any fragmentation */ 316 if (likely (IPV6_MTU_DEFAULT >= m->pkt_len)) { 317 qconf->tx_mbufs[port_out].m_table[len] = m; 318 len2 = 1; 319 } else { 320 len2 = rte_ipv6_fragment_packet(m, 321 &qconf->tx_mbufs[port_out].m_table[len], 322 (uint16_t)(MBUF_TABLE_SIZE - len), 323 IPV6_MTU_DEFAULT, 324 rxq->direct_pool, rxq->indirect_pool); 325 326 /* Free input packet */ 327 rte_pktmbuf_free(m); 328 329 /* If we fail to fragment the packet */ 330 if (unlikely (len2 < 0)) 331 return; 332 } 333 } 334 /* else, just forward the packet */ 335 else { 336 qconf->tx_mbufs[port_out].m_table[len] = m; 337 len2 = 1; 338 } 339 340 for (i = len; i < len + len2; i ++) { 341 void *d_addr_bytes; 342 343 m = qconf->tx_mbufs[port_out].m_table[i]; 344 struct rte_ether_hdr *eth_hdr = (struct rte_ether_hdr *) 345 rte_pktmbuf_prepend(m, 346 (uint16_t)sizeof(struct rte_ether_hdr)); 347 if (eth_hdr == NULL) { 348 rte_panic("No headroom in mbuf.\n"); 349 } 350 351 m->l2_len = sizeof(struct rte_ether_hdr); 352 353 /* 02:00:00:00:00:xx */ 354 d_addr_bytes = ð_hdr->d_addr.addr_bytes[0]; 355 *((uint64_t *)d_addr_bytes) = 0x000000000002 + ((uint64_t)port_out << 40); 356 357 /* src addr */ 358 rte_ether_addr_copy(&ports_eth_addr[port_out], 359 ð_hdr->s_addr); 360 if (ipv6) 361 eth_hdr->ether_type = 362 rte_be_to_cpu_16(RTE_ETHER_TYPE_IPV6); 363 else 364 eth_hdr->ether_type = 365 rte_be_to_cpu_16(RTE_ETHER_TYPE_IPV4); 366 } 367 368 len += len2; 369 370 if (likely(len < MAX_PKT_BURST)) { 371 qconf->tx_mbufs[port_out].len = (uint16_t)len; 372 return; 373 } 374 375 /* Transmit packets */ 376 send_burst(qconf, (uint16_t)len, port_out); 377 qconf->tx_mbufs[port_out].len = 0; 378 } 379 380 /* main processing loop */ 381 static int 382 main_loop(__attribute__((unused)) void *dummy) 383 { 384 struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; 385 unsigned lcore_id; 386 uint64_t prev_tsc, diff_tsc, cur_tsc; 387 int i, j, nb_rx; 388 uint16_t portid; 389 struct lcore_queue_conf *qconf; 390 const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * BURST_TX_DRAIN_US; 391 392 prev_tsc = 0; 393 394 lcore_id = rte_lcore_id(); 395 qconf = &lcore_queue_conf[lcore_id]; 396 397 if (qconf->n_rx_queue == 0) { 398 RTE_LOG(INFO, IP_FRAG, "lcore %u has nothing to do\n", lcore_id); 399 return 0; 400 } 401 402 RTE_LOG(INFO, IP_FRAG, "entering main loop on lcore %u\n", lcore_id); 403 404 for (i = 0; i < qconf->n_rx_queue; i++) { 405 406 portid = qconf->rx_queue_list[i].portid; 407 RTE_LOG(INFO, IP_FRAG, " -- lcoreid=%u portid=%d\n", lcore_id, 408 portid); 409 } 410 411 while (1) { 412 413 cur_tsc = rte_rdtsc(); 414 415 /* 416 * TX burst queue drain 417 */ 418 diff_tsc = cur_tsc - prev_tsc; 419 if (unlikely(diff_tsc > drain_tsc)) { 420 421 /* 422 * This could be optimized (use queueid instead of 423 * portid), but it is not called so often 424 */ 425 for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++) { 426 if (qconf->tx_mbufs[portid].len == 0) 427 continue; 428 send_burst(&lcore_queue_conf[lcore_id], 429 qconf->tx_mbufs[portid].len, 430 portid); 431 qconf->tx_mbufs[portid].len = 0; 432 } 433 434 prev_tsc = cur_tsc; 435 } 436 437 /* 438 * Read packet from RX queues 439 */ 440 for (i = 0; i < qconf->n_rx_queue; i++) { 441 442 portid = qconf->rx_queue_list[i].portid; 443 nb_rx = rte_eth_rx_burst(portid, 0, pkts_burst, 444 MAX_PKT_BURST); 445 446 /* Prefetch first packets */ 447 for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++) { 448 rte_prefetch0(rte_pktmbuf_mtod( 449 pkts_burst[j], void *)); 450 } 451 452 /* Prefetch and forward already prefetched packets */ 453 for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) { 454 rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[ 455 j + PREFETCH_OFFSET], void *)); 456 l3fwd_simple_forward(pkts_burst[j], qconf, i, portid); 457 } 458 459 /* Forward remaining prefetched packets */ 460 for (; j < nb_rx; j++) { 461 l3fwd_simple_forward(pkts_burst[j], qconf, i, portid); 462 } 463 } 464 } 465 } 466 467 /* display usage */ 468 static void 469 print_usage(const char *prgname) 470 { 471 printf("%s [EAL options] -- -p PORTMASK [-q NQ]\n" 472 " -p PORTMASK: hexadecimal bitmask of ports to configure\n" 473 " -q NQ: number of queue (=ports) per lcore (default is 1)\n", 474 prgname); 475 } 476 477 static int 478 parse_portmask(const char *portmask) 479 { 480 char *end = NULL; 481 unsigned long pm; 482 483 /* parse hexadecimal string */ 484 pm = strtoul(portmask, &end, 16); 485 if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0')) 486 return -1; 487 488 if (pm == 0) 489 return -1; 490 491 return pm; 492 } 493 494 static int 495 parse_nqueue(const char *q_arg) 496 { 497 char *end = NULL; 498 unsigned long n; 499 500 /* parse hexadecimal string */ 501 n = strtoul(q_arg, &end, 10); 502 if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0')) 503 return -1; 504 if (n == 0) 505 return -1; 506 if (n >= MAX_RX_QUEUE_PER_LCORE) 507 return -1; 508 509 return n; 510 } 511 512 /* Parse the argument given in the command line of the application */ 513 static int 514 parse_args(int argc, char **argv) 515 { 516 int opt, ret; 517 char **argvopt; 518 int option_index; 519 char *prgname = argv[0]; 520 static struct option lgopts[] = { 521 {NULL, 0, 0, 0} 522 }; 523 524 argvopt = argv; 525 526 while ((opt = getopt_long(argc, argvopt, "p:q:", 527 lgopts, &option_index)) != EOF) { 528 529 switch (opt) { 530 /* portmask */ 531 case 'p': 532 enabled_port_mask = parse_portmask(optarg); 533 if (enabled_port_mask < 0) { 534 printf("invalid portmask\n"); 535 print_usage(prgname); 536 return -1; 537 } 538 break; 539 540 /* nqueue */ 541 case 'q': 542 rx_queue_per_lcore = parse_nqueue(optarg); 543 if (rx_queue_per_lcore < 0) { 544 printf("invalid queue number\n"); 545 print_usage(prgname); 546 return -1; 547 } 548 break; 549 550 /* long options */ 551 case 0: 552 print_usage(prgname); 553 return -1; 554 555 default: 556 print_usage(prgname); 557 return -1; 558 } 559 } 560 561 if (enabled_port_mask == 0) { 562 printf("portmask not specified\n"); 563 print_usage(prgname); 564 return -1; 565 } 566 567 if (optind >= 0) 568 argv[optind-1] = prgname; 569 570 ret = optind-1; 571 optind = 1; /* reset getopt lib */ 572 return ret; 573 } 574 575 static void 576 print_ethaddr(const char *name, struct rte_ether_addr *eth_addr) 577 { 578 char buf[RTE_ETHER_ADDR_FMT_SIZE]; 579 rte_ether_format_addr(buf, RTE_ETHER_ADDR_FMT_SIZE, eth_addr); 580 printf("%s%s", name, buf); 581 } 582 583 /* Check the link status of all ports in up to 9s, and print them finally */ 584 static void 585 check_all_ports_link_status(uint32_t port_mask) 586 { 587 #define CHECK_INTERVAL 100 /* 100ms */ 588 #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */ 589 uint16_t portid; 590 uint8_t count, all_ports_up, print_flag = 0; 591 struct rte_eth_link link; 592 593 printf("\nChecking link status"); 594 fflush(stdout); 595 for (count = 0; count <= MAX_CHECK_TIME; count++) { 596 all_ports_up = 1; 597 RTE_ETH_FOREACH_DEV(portid) { 598 if ((port_mask & (1 << portid)) == 0) 599 continue; 600 memset(&link, 0, sizeof(link)); 601 rte_eth_link_get_nowait(portid, &link); 602 /* print link status if flag set */ 603 if (print_flag == 1) { 604 if (link.link_status) 605 printf( 606 "Port%d Link Up .Speed %u Mbps - %s\n", 607 portid, link.link_speed, 608 (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? 609 ("full-duplex") : ("half-duplex\n")); 610 else 611 printf("Port %d Link Down\n", portid); 612 continue; 613 } 614 /* clear all_ports_up flag if any link down */ 615 if (link.link_status == ETH_LINK_DOWN) { 616 all_ports_up = 0; 617 break; 618 } 619 } 620 /* after finally printing all link status, get out */ 621 if (print_flag == 1) 622 break; 623 624 if (all_ports_up == 0) { 625 printf("."); 626 fflush(stdout); 627 rte_delay_ms(CHECK_INTERVAL); 628 } 629 630 /* set the print_flag if all ports up or timeout */ 631 if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) { 632 print_flag = 1; 633 printf("\ndone\n"); 634 } 635 } 636 } 637 638 /* Check L3 packet type detection capablity of the NIC port */ 639 static int 640 check_ptype(int portid) 641 { 642 int i, ret; 643 int ptype_l3_ipv4 = 0, ptype_l3_ipv6 = 0; 644 uint32_t ptype_mask = RTE_PTYPE_L3_MASK; 645 646 ret = rte_eth_dev_get_supported_ptypes(portid, ptype_mask, NULL, 0); 647 if (ret <= 0) 648 return 0; 649 650 uint32_t ptypes[ret]; 651 652 ret = rte_eth_dev_get_supported_ptypes(portid, ptype_mask, ptypes, ret); 653 for (i = 0; i < ret; ++i) { 654 if (ptypes[i] & RTE_PTYPE_L3_IPV4) 655 ptype_l3_ipv4 = 1; 656 if (ptypes[i] & RTE_PTYPE_L3_IPV6) 657 ptype_l3_ipv6 = 1; 658 } 659 660 if (ptype_l3_ipv4 == 0) 661 printf("port %d cannot parse RTE_PTYPE_L3_IPV4\n", portid); 662 663 if (ptype_l3_ipv6 == 0) 664 printf("port %d cannot parse RTE_PTYPE_L3_IPV6\n", portid); 665 666 if (ptype_l3_ipv4 && ptype_l3_ipv6) 667 return 1; 668 669 return 0; 670 671 } 672 673 /* Parse packet type of a packet by SW */ 674 static inline void 675 parse_ptype(struct rte_mbuf *m) 676 { 677 struct rte_ether_hdr *eth_hdr; 678 uint32_t packet_type = RTE_PTYPE_UNKNOWN; 679 uint16_t ether_type; 680 681 eth_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); 682 ether_type = eth_hdr->ether_type; 683 if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4)) 684 packet_type |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN; 685 else if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6)) 686 packet_type |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN; 687 688 m->packet_type = packet_type; 689 } 690 691 /* callback function to detect packet type for a queue of a port */ 692 static uint16_t 693 cb_parse_ptype(uint16_t port __rte_unused, uint16_t queue __rte_unused, 694 struct rte_mbuf *pkts[], uint16_t nb_pkts, 695 uint16_t max_pkts __rte_unused, 696 void *user_param __rte_unused) 697 { 698 uint16_t i; 699 700 for (i = 0; i < nb_pkts; ++i) 701 parse_ptype(pkts[i]); 702 703 return nb_pkts; 704 } 705 706 static int 707 init_routing_table(void) 708 { 709 struct rte_lpm *lpm; 710 struct rte_lpm6 *lpm6; 711 int socket, ret; 712 unsigned i; 713 714 for (socket = 0; socket < RTE_MAX_NUMA_NODES; socket++) { 715 if (socket_lpm[socket]) { 716 lpm = socket_lpm[socket]; 717 /* populate the LPM table */ 718 for (i = 0; i < RTE_DIM(l3fwd_ipv4_route_array); i++) { 719 ret = rte_lpm_add(lpm, 720 l3fwd_ipv4_route_array[i].ip, 721 l3fwd_ipv4_route_array[i].depth, 722 l3fwd_ipv4_route_array[i].if_out); 723 724 if (ret < 0) { 725 RTE_LOG(ERR, IP_FRAG, "Unable to add entry %i to the l3fwd " 726 "LPM table\n", i); 727 return -1; 728 } 729 730 RTE_LOG(INFO, IP_FRAG, "Socket %i: adding route " IPv4_BYTES_FMT 731 "/%d (port %d)\n", 732 socket, 733 IPv4_BYTES(l3fwd_ipv4_route_array[i].ip), 734 l3fwd_ipv4_route_array[i].depth, 735 l3fwd_ipv4_route_array[i].if_out); 736 } 737 } 738 739 if (socket_lpm6[socket]) { 740 lpm6 = socket_lpm6[socket]; 741 /* populate the LPM6 table */ 742 for (i = 0; i < RTE_DIM(l3fwd_ipv6_route_array); i++) { 743 ret = rte_lpm6_add(lpm6, 744 l3fwd_ipv6_route_array[i].ip, 745 l3fwd_ipv6_route_array[i].depth, 746 l3fwd_ipv6_route_array[i].if_out); 747 748 if (ret < 0) { 749 RTE_LOG(ERR, IP_FRAG, "Unable to add entry %i to the l3fwd " 750 "LPM6 table\n", i); 751 return -1; 752 } 753 754 RTE_LOG(INFO, IP_FRAG, "Socket %i: adding route " IPv6_BYTES_FMT 755 "/%d (port %d)\n", 756 socket, 757 IPv6_BYTES(l3fwd_ipv6_route_array[i].ip), 758 l3fwd_ipv6_route_array[i].depth, 759 l3fwd_ipv6_route_array[i].if_out); 760 } 761 } 762 } 763 return 0; 764 } 765 766 static int 767 init_mem(void) 768 { 769 char buf[PATH_MAX]; 770 struct rte_mempool *mp; 771 struct rte_lpm *lpm; 772 struct rte_lpm6 *lpm6; 773 struct rte_lpm_config lpm_config; 774 int socket; 775 unsigned lcore_id; 776 777 /* traverse through lcores and initialize structures on each socket */ 778 779 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { 780 781 if (rte_lcore_is_enabled(lcore_id) == 0) 782 continue; 783 784 socket = rte_lcore_to_socket_id(lcore_id); 785 786 if (socket == SOCKET_ID_ANY) 787 socket = 0; 788 789 if (socket_direct_pool[socket] == NULL) { 790 RTE_LOG(INFO, IP_FRAG, "Creating direct mempool on socket %i\n", 791 socket); 792 snprintf(buf, sizeof(buf), "pool_direct_%i", socket); 793 794 mp = rte_pktmbuf_pool_create(buf, NB_MBUF, 32, 795 0, RTE_MBUF_DEFAULT_BUF_SIZE, socket); 796 if (mp == NULL) { 797 RTE_LOG(ERR, IP_FRAG, "Cannot create direct mempool\n"); 798 return -1; 799 } 800 socket_direct_pool[socket] = mp; 801 } 802 803 if (socket_indirect_pool[socket] == NULL) { 804 RTE_LOG(INFO, IP_FRAG, "Creating indirect mempool on socket %i\n", 805 socket); 806 snprintf(buf, sizeof(buf), "pool_indirect_%i", socket); 807 808 mp = rte_pktmbuf_pool_create(buf, NB_MBUF, 32, 0, 0, 809 socket); 810 if (mp == NULL) { 811 RTE_LOG(ERR, IP_FRAG, "Cannot create indirect mempool\n"); 812 return -1; 813 } 814 socket_indirect_pool[socket] = mp; 815 } 816 817 if (socket_lpm[socket] == NULL) { 818 RTE_LOG(INFO, IP_FRAG, "Creating LPM table on socket %i\n", socket); 819 snprintf(buf, sizeof(buf), "IP_FRAG_LPM_%i", socket); 820 821 lpm_config.max_rules = LPM_MAX_RULES; 822 lpm_config.number_tbl8s = 256; 823 lpm_config.flags = 0; 824 825 lpm = rte_lpm_create(buf, socket, &lpm_config); 826 if (lpm == NULL) { 827 RTE_LOG(ERR, IP_FRAG, "Cannot create LPM table\n"); 828 return -1; 829 } 830 socket_lpm[socket] = lpm; 831 } 832 833 if (socket_lpm6[socket] == NULL) { 834 RTE_LOG(INFO, IP_FRAG, "Creating LPM6 table on socket %i\n", socket); 835 snprintf(buf, sizeof(buf), "IP_FRAG_LPM_%i", socket); 836 837 lpm6 = rte_lpm6_create(buf, socket, &lpm6_config); 838 if (lpm6 == NULL) { 839 RTE_LOG(ERR, IP_FRAG, "Cannot create LPM table\n"); 840 return -1; 841 } 842 socket_lpm6[socket] = lpm6; 843 } 844 } 845 846 return 0; 847 } 848 849 int 850 main(int argc, char **argv) 851 { 852 struct lcore_queue_conf *qconf; 853 struct rte_eth_dev_info dev_info; 854 struct rte_eth_txconf *txconf; 855 struct rx_queue *rxq; 856 int socket, ret; 857 uint16_t nb_ports; 858 uint16_t queueid = 0; 859 unsigned lcore_id = 0, rx_lcore_id = 0; 860 uint32_t n_tx_queue, nb_lcores; 861 uint16_t portid; 862 863 /* init EAL */ 864 ret = rte_eal_init(argc, argv); 865 if (ret < 0) 866 rte_exit(EXIT_FAILURE, "rte_eal_init failed"); 867 argc -= ret; 868 argv += ret; 869 870 /* parse application arguments (after the EAL ones) */ 871 ret = parse_args(argc, argv); 872 if (ret < 0) 873 rte_exit(EXIT_FAILURE, "Invalid arguments"); 874 875 nb_ports = rte_eth_dev_count_avail(); 876 if (nb_ports == 0) 877 rte_exit(EXIT_FAILURE, "No ports found!\n"); 878 879 nb_lcores = rte_lcore_count(); 880 881 /* initialize structures (mempools, lpm etc.) */ 882 if (init_mem() < 0) 883 rte_panic("Cannot initialize memory structures!\n"); 884 885 /* check if portmask has non-existent ports */ 886 if (enabled_port_mask & ~(RTE_LEN2MASK(nb_ports, unsigned))) 887 rte_exit(EXIT_FAILURE, "Non-existent ports in portmask!\n"); 888 889 /* initialize all ports */ 890 RTE_ETH_FOREACH_DEV(portid) { 891 struct rte_eth_conf local_port_conf = port_conf; 892 struct rte_eth_rxconf rxq_conf; 893 894 /* skip ports that are not enabled */ 895 if ((enabled_port_mask & (1 << portid)) == 0) { 896 printf("Skipping disabled port %d\n", portid); 897 continue; 898 } 899 900 qconf = &lcore_queue_conf[rx_lcore_id]; 901 902 /* limit the frame size to the maximum supported by NIC */ 903 rte_eth_dev_info_get(portid, &dev_info); 904 local_port_conf.rxmode.max_rx_pkt_len = RTE_MIN( 905 dev_info.max_rx_pktlen, 906 local_port_conf.rxmode.max_rx_pkt_len); 907 908 /* get the lcore_id for this port */ 909 while (rte_lcore_is_enabled(rx_lcore_id) == 0 || 910 qconf->n_rx_queue == (unsigned)rx_queue_per_lcore) { 911 912 rx_lcore_id ++; 913 if (rx_lcore_id >= RTE_MAX_LCORE) 914 rte_exit(EXIT_FAILURE, "Not enough cores\n"); 915 916 qconf = &lcore_queue_conf[rx_lcore_id]; 917 } 918 919 socket = (int) rte_lcore_to_socket_id(rx_lcore_id); 920 if (socket == SOCKET_ID_ANY) 921 socket = 0; 922 923 rxq = &qconf->rx_queue_list[qconf->n_rx_queue]; 924 rxq->portid = portid; 925 rxq->direct_pool = socket_direct_pool[socket]; 926 rxq->indirect_pool = socket_indirect_pool[socket]; 927 rxq->lpm = socket_lpm[socket]; 928 rxq->lpm6 = socket_lpm6[socket]; 929 qconf->n_rx_queue++; 930 931 /* init port */ 932 printf("Initializing port %d on lcore %u...", portid, 933 rx_lcore_id); 934 fflush(stdout); 935 936 n_tx_queue = nb_lcores; 937 if (n_tx_queue > MAX_TX_QUEUE_PER_PORT) 938 n_tx_queue = MAX_TX_QUEUE_PER_PORT; 939 if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE) 940 local_port_conf.txmode.offloads |= 941 DEV_TX_OFFLOAD_MBUF_FAST_FREE; 942 ret = rte_eth_dev_configure(portid, 1, (uint16_t)n_tx_queue, 943 &local_port_conf); 944 if (ret < 0) { 945 printf("\n"); 946 rte_exit(EXIT_FAILURE, "Cannot configure device: " 947 "err=%d, port=%d\n", 948 ret, portid); 949 } 950 951 /* set the mtu to the maximum received packet size */ 952 ret = rte_eth_dev_set_mtu(portid, 953 local_port_conf.rxmode.max_rx_pkt_len - MTU_OVERHEAD); 954 if (ret < 0) { 955 printf("\n"); 956 rte_exit(EXIT_FAILURE, "Set MTU failed: " 957 "err=%d, port=%d\n", 958 ret, portid); 959 } 960 961 ret = rte_eth_dev_adjust_nb_rx_tx_desc(portid, &nb_rxd, 962 &nb_txd); 963 if (ret < 0) { 964 printf("\n"); 965 rte_exit(EXIT_FAILURE, "Cannot adjust number of " 966 "descriptors: err=%d, port=%d\n", ret, portid); 967 } 968 969 /* init one RX queue */ 970 rxq_conf = dev_info.default_rxconf; 971 rxq_conf.offloads = local_port_conf.rxmode.offloads; 972 ret = rte_eth_rx_queue_setup(portid, 0, nb_rxd, 973 socket, &rxq_conf, 974 socket_direct_pool[socket]); 975 if (ret < 0) { 976 printf("\n"); 977 rte_exit(EXIT_FAILURE, "rte_eth_rx_queue_setup: " 978 "err=%d, port=%d\n", 979 ret, portid); 980 } 981 982 rte_eth_macaddr_get(portid, &ports_eth_addr[portid]); 983 print_ethaddr(" Address:", &ports_eth_addr[portid]); 984 printf("\n"); 985 986 /* init one TX queue per couple (lcore,port) */ 987 queueid = 0; 988 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { 989 if (rte_lcore_is_enabled(lcore_id) == 0) 990 continue; 991 992 if (queueid >= rte_eth_devices[portid].data->nb_tx_queues) 993 break; 994 995 socket = (int) rte_lcore_to_socket_id(lcore_id); 996 printf("txq=%u,%d ", lcore_id, queueid); 997 fflush(stdout); 998 999 txconf = &dev_info.default_txconf; 1000 txconf->offloads = local_port_conf.txmode.offloads; 1001 ret = rte_eth_tx_queue_setup(portid, queueid, nb_txd, 1002 socket, txconf); 1003 if (ret < 0) { 1004 printf("\n"); 1005 rte_exit(EXIT_FAILURE, "rte_eth_tx_queue_setup: " 1006 "err=%d, port=%d\n", ret, portid); 1007 } 1008 1009 qconf = &lcore_queue_conf[lcore_id]; 1010 qconf->tx_queue_id[portid] = queueid; 1011 queueid++; 1012 } 1013 1014 printf("\n"); 1015 } 1016 1017 printf("\n"); 1018 1019 /* start ports */ 1020 RTE_ETH_FOREACH_DEV(portid) { 1021 if ((enabled_port_mask & (1 << portid)) == 0) { 1022 continue; 1023 } 1024 /* Start device */ 1025 ret = rte_eth_dev_start(portid); 1026 if (ret < 0) 1027 rte_exit(EXIT_FAILURE, "rte_eth_dev_start: err=%d, port=%d\n", 1028 ret, portid); 1029 1030 rte_eth_promiscuous_enable(portid); 1031 1032 if (check_ptype(portid) == 0) { 1033 rte_eth_add_rx_callback(portid, 0, cb_parse_ptype, NULL); 1034 printf("Add Rx callback function to detect L3 packet type by SW :" 1035 " port = %d\n", portid); 1036 } 1037 } 1038 1039 if (init_routing_table() < 0) 1040 rte_exit(EXIT_FAILURE, "Cannot init routing table\n"); 1041 1042 check_all_ports_link_status(enabled_port_mask); 1043 1044 /* launch per-lcore init on every lcore */ 1045 rte_eal_mp_remote_launch(main_loop, NULL, CALL_MASTER); 1046 RTE_LCORE_FOREACH_SLAVE(lcore_id) { 1047 if (rte_eal_wait_lcore(lcore_id) < 0) 1048 return -1; 1049 } 1050 1051 return 0; 1052 } 1053