1 /*- 2 * BSD LICENSE 3 * 4 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <stdio.h> 35 #include <stdlib.h> 36 #include <stdint.h> 37 #include <inttypes.h> 38 #include <sys/types.h> 39 #include <sys/param.h> 40 #include <string.h> 41 #include <sys/queue.h> 42 #include <stdarg.h> 43 #include <errno.h> 44 #include <getopt.h> 45 46 #include <rte_common.h> 47 #include <rte_byteorder.h> 48 #include <rte_log.h> 49 #include <rte_memory.h> 50 #include <rte_memcpy.h> 51 #include <rte_eal.h> 52 #include <rte_launch.h> 53 #include <rte_atomic.h> 54 #include <rte_cycles.h> 55 #include <rte_prefetch.h> 56 #include <rte_lcore.h> 57 #include <rte_per_lcore.h> 58 #include <rte_branch_prediction.h> 59 #include <rte_interrupts.h> 60 #include <rte_random.h> 61 #include <rte_debug.h> 62 #include <rte_ether.h> 63 #include <rte_ethdev.h> 64 #include <rte_mempool.h> 65 #include <rte_mbuf.h> 66 #include <rte_lpm.h> 67 #include <rte_lpm6.h> 68 #include <rte_ip.h> 69 #include <rte_string_fns.h> 70 71 #include <rte_ip_frag.h> 72 73 #define RTE_LOGTYPE_IP_FRAG RTE_LOGTYPE_USER1 74 75 /* allow max jumbo frame 9.5 KB */ 76 #define JUMBO_FRAME_MAX_SIZE 0x2600 77 78 #define ROUNDUP_DIV(a, b) (((a) + (b) - 1) / (b)) 79 80 /* 81 * Default byte size for the IPv6 Maximum Transfer Unit (MTU). 82 * This value includes the size of IPv6 header. 83 */ 84 #define IPV4_MTU_DEFAULT ETHER_MTU 85 #define IPV6_MTU_DEFAULT ETHER_MTU 86 87 /* 88 * Default payload in bytes for the IPv6 packet. 89 */ 90 #define IPV4_DEFAULT_PAYLOAD (IPV4_MTU_DEFAULT - sizeof(struct ipv4_hdr)) 91 #define IPV6_DEFAULT_PAYLOAD (IPV6_MTU_DEFAULT - sizeof(struct ipv6_hdr)) 92 93 /* 94 * Max number of fragments per packet expected - defined by config file. 95 */ 96 #define MAX_PACKET_FRAG RTE_LIBRTE_IP_FRAG_MAX_FRAG 97 98 #define NB_MBUF 8192 99 100 #define MAX_PKT_BURST 32 101 #define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */ 102 103 /* Configure how many packets ahead to prefetch, when reading packets */ 104 #define PREFETCH_OFFSET 3 105 106 /* 107 * Configurable number of RX/TX ring descriptors 108 */ 109 #define RTE_TEST_RX_DESC_DEFAULT 128 110 #define RTE_TEST_TX_DESC_DEFAULT 512 111 static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT; 112 static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT; 113 114 /* ethernet addresses of ports */ 115 static struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS]; 116 117 #ifndef IPv4_BYTES 118 #define IPv4_BYTES_FMT "%" PRIu8 ".%" PRIu8 ".%" PRIu8 ".%" PRIu8 119 #define IPv4_BYTES(addr) \ 120 (uint8_t) (((addr) >> 24) & 0xFF),\ 121 (uint8_t) (((addr) >> 16) & 0xFF),\ 122 (uint8_t) (((addr) >> 8) & 0xFF),\ 123 (uint8_t) ((addr) & 0xFF) 124 #endif 125 126 #ifndef IPv6_BYTES 127 #define IPv6_BYTES_FMT "%02x%02x:%02x%02x:%02x%02x:%02x%02x:"\ 128 "%02x%02x:%02x%02x:%02x%02x:%02x%02x" 129 #define IPv6_BYTES(addr) \ 130 addr[0], addr[1], addr[2], addr[3], \ 131 addr[4], addr[5], addr[6], addr[7], \ 132 addr[8], addr[9], addr[10], addr[11],\ 133 addr[12], addr[13],addr[14], addr[15] 134 #endif 135 136 #define IPV6_ADDR_LEN 16 137 138 /* mask of enabled ports */ 139 static int enabled_port_mask = 0; 140 141 static int rx_queue_per_lcore = 1; 142 143 #define MBUF_TABLE_SIZE (2 * MAX(MAX_PKT_BURST, MAX_PACKET_FRAG)) 144 145 struct mbuf_table { 146 uint16_t len; 147 struct rte_mbuf *m_table[MBUF_TABLE_SIZE]; 148 }; 149 150 struct rx_queue { 151 struct rte_mempool *direct_pool; 152 struct rte_mempool *indirect_pool; 153 struct rte_lpm *lpm; 154 struct rte_lpm6 *lpm6; 155 uint16_t portid; 156 }; 157 158 #define MAX_RX_QUEUE_PER_LCORE 16 159 #define MAX_TX_QUEUE_PER_PORT 16 160 struct lcore_queue_conf { 161 uint16_t n_rx_queue; 162 uint16_t tx_queue_id[RTE_MAX_ETHPORTS]; 163 struct rx_queue rx_queue_list[MAX_RX_QUEUE_PER_LCORE]; 164 struct mbuf_table tx_mbufs[RTE_MAX_ETHPORTS]; 165 } __rte_cache_aligned; 166 struct lcore_queue_conf lcore_queue_conf[RTE_MAX_LCORE]; 167 168 static struct rte_eth_conf port_conf = { 169 .rxmode = { 170 .max_rx_pkt_len = JUMBO_FRAME_MAX_SIZE, 171 .split_hdr_size = 0, 172 .header_split = 0, /**< Header Split disabled */ 173 .hw_ip_checksum = 1, /**< IP checksum offload enabled */ 174 .hw_vlan_filter = 0, /**< VLAN filtering disabled */ 175 .jumbo_frame = 1, /**< Jumbo Frame Support enabled */ 176 .hw_strip_crc = 1, /**< CRC stripped by hardware */ 177 }, 178 .txmode = { 179 .mq_mode = ETH_MQ_TX_NONE, 180 }, 181 }; 182 183 /* 184 * IPv4 forwarding table 185 */ 186 struct l3fwd_ipv4_route { 187 uint32_t ip; 188 uint8_t depth; 189 uint8_t if_out; 190 }; 191 192 struct l3fwd_ipv4_route l3fwd_ipv4_route_array[] = { 193 {IPv4(100,10,0,0), 16, 0}, 194 {IPv4(100,20,0,0), 16, 1}, 195 {IPv4(100,30,0,0), 16, 2}, 196 {IPv4(100,40,0,0), 16, 3}, 197 {IPv4(100,50,0,0), 16, 4}, 198 {IPv4(100,60,0,0), 16, 5}, 199 {IPv4(100,70,0,0), 16, 6}, 200 {IPv4(100,80,0,0), 16, 7}, 201 }; 202 203 /* 204 * IPv6 forwarding table 205 */ 206 207 struct l3fwd_ipv6_route { 208 uint8_t ip[IPV6_ADDR_LEN]; 209 uint8_t depth; 210 uint8_t if_out; 211 }; 212 213 static struct l3fwd_ipv6_route l3fwd_ipv6_route_array[] = { 214 {{1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 0}, 215 {{2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 1}, 216 {{3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 2}, 217 {{4,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 3}, 218 {{5,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 4}, 219 {{6,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 5}, 220 {{7,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 6}, 221 {{8,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 7}, 222 }; 223 224 #define LPM_MAX_RULES 1024 225 #define LPM6_MAX_RULES 1024 226 #define LPM6_NUMBER_TBL8S (1 << 16) 227 228 struct rte_lpm6_config lpm6_config = { 229 .max_rules = LPM6_MAX_RULES, 230 .number_tbl8s = LPM6_NUMBER_TBL8S, 231 .flags = 0 232 }; 233 234 static struct rte_mempool *socket_direct_pool[RTE_MAX_NUMA_NODES]; 235 static struct rte_mempool *socket_indirect_pool[RTE_MAX_NUMA_NODES]; 236 static struct rte_lpm *socket_lpm[RTE_MAX_NUMA_NODES]; 237 static struct rte_lpm6 *socket_lpm6[RTE_MAX_NUMA_NODES]; 238 239 /* Send burst of packets on an output interface */ 240 static inline int 241 send_burst(struct lcore_queue_conf *qconf, uint16_t n, uint16_t port) 242 { 243 struct rte_mbuf **m_table; 244 int ret; 245 uint16_t queueid; 246 247 queueid = qconf->tx_queue_id[port]; 248 m_table = (struct rte_mbuf **)qconf->tx_mbufs[port].m_table; 249 250 ret = rte_eth_tx_burst(port, queueid, m_table, n); 251 if (unlikely(ret < n)) { 252 do { 253 rte_pktmbuf_free(m_table[ret]); 254 } while (++ret < n); 255 } 256 257 return 0; 258 } 259 260 static inline void 261 l3fwd_simple_forward(struct rte_mbuf *m, struct lcore_queue_conf *qconf, 262 uint8_t queueid, uint16_t port_in) 263 { 264 struct rx_queue *rxq; 265 uint32_t i, len, next_hop; 266 uint8_t ipv6; 267 uint16_t port_out; 268 int32_t len2; 269 270 ipv6 = 0; 271 rxq = &qconf->rx_queue_list[queueid]; 272 273 /* by default, send everything back to the source port */ 274 port_out = port_in; 275 276 /* Remove the Ethernet header and trailer from the input packet */ 277 rte_pktmbuf_adj(m, (uint16_t)sizeof(struct ether_hdr)); 278 279 /* Build transmission burst */ 280 len = qconf->tx_mbufs[port_out].len; 281 282 /* if this is an IPv4 packet */ 283 if (RTE_ETH_IS_IPV4_HDR(m->packet_type)) { 284 struct ipv4_hdr *ip_hdr; 285 uint32_t ip_dst; 286 /* Read the lookup key (i.e. ip_dst) from the input packet */ 287 ip_hdr = rte_pktmbuf_mtod(m, struct ipv4_hdr *); 288 ip_dst = rte_be_to_cpu_32(ip_hdr->dst_addr); 289 290 /* Find destination port */ 291 if (rte_lpm_lookup(rxq->lpm, ip_dst, &next_hop) == 0 && 292 (enabled_port_mask & 1 << next_hop) != 0) { 293 port_out = next_hop; 294 295 /* Build transmission burst for new port */ 296 len = qconf->tx_mbufs[port_out].len; 297 } 298 299 /* if we don't need to do any fragmentation */ 300 if (likely (IPV4_MTU_DEFAULT >= m->pkt_len)) { 301 qconf->tx_mbufs[port_out].m_table[len] = m; 302 len2 = 1; 303 } else { 304 len2 = rte_ipv4_fragment_packet(m, 305 &qconf->tx_mbufs[port_out].m_table[len], 306 (uint16_t)(MBUF_TABLE_SIZE - len), 307 IPV4_MTU_DEFAULT, 308 rxq->direct_pool, rxq->indirect_pool); 309 310 /* Free input packet */ 311 rte_pktmbuf_free(m); 312 313 /* If we fail to fragment the packet */ 314 if (unlikely (len2 < 0)) 315 return; 316 } 317 } else if (RTE_ETH_IS_IPV6_HDR(m->packet_type)) { 318 /* if this is an IPv6 packet */ 319 struct ipv6_hdr *ip_hdr; 320 321 ipv6 = 1; 322 323 /* Read the lookup key (i.e. ip_dst) from the input packet */ 324 ip_hdr = rte_pktmbuf_mtod(m, struct ipv6_hdr *); 325 326 /* Find destination port */ 327 if (rte_lpm6_lookup(rxq->lpm6, ip_hdr->dst_addr, 328 &next_hop) == 0 && 329 (enabled_port_mask & 1 << next_hop) != 0) { 330 port_out = next_hop; 331 332 /* Build transmission burst for new port */ 333 len = qconf->tx_mbufs[port_out].len; 334 } 335 336 /* if we don't need to do any fragmentation */ 337 if (likely (IPV6_MTU_DEFAULT >= m->pkt_len)) { 338 qconf->tx_mbufs[port_out].m_table[len] = m; 339 len2 = 1; 340 } else { 341 len2 = rte_ipv6_fragment_packet(m, 342 &qconf->tx_mbufs[port_out].m_table[len], 343 (uint16_t)(MBUF_TABLE_SIZE - len), 344 IPV6_MTU_DEFAULT, 345 rxq->direct_pool, rxq->indirect_pool); 346 347 /* Free input packet */ 348 rte_pktmbuf_free(m); 349 350 /* If we fail to fragment the packet */ 351 if (unlikely (len2 < 0)) 352 return; 353 } 354 } 355 /* else, just forward the packet */ 356 else { 357 qconf->tx_mbufs[port_out].m_table[len] = m; 358 len2 = 1; 359 } 360 361 for (i = len; i < len + len2; i ++) { 362 void *d_addr_bytes; 363 364 m = qconf->tx_mbufs[port_out].m_table[i]; 365 struct ether_hdr *eth_hdr = (struct ether_hdr *) 366 rte_pktmbuf_prepend(m, (uint16_t)sizeof(struct ether_hdr)); 367 if (eth_hdr == NULL) { 368 rte_panic("No headroom in mbuf.\n"); 369 } 370 371 m->l2_len = sizeof(struct ether_hdr); 372 373 /* 02:00:00:00:00:xx */ 374 d_addr_bytes = ð_hdr->d_addr.addr_bytes[0]; 375 *((uint64_t *)d_addr_bytes) = 0x000000000002 + ((uint64_t)port_out << 40); 376 377 /* src addr */ 378 ether_addr_copy(&ports_eth_addr[port_out], ð_hdr->s_addr); 379 if (ipv6) 380 eth_hdr->ether_type = rte_be_to_cpu_16(ETHER_TYPE_IPv6); 381 else 382 eth_hdr->ether_type = rte_be_to_cpu_16(ETHER_TYPE_IPv4); 383 } 384 385 len += len2; 386 387 if (likely(len < MAX_PKT_BURST)) { 388 qconf->tx_mbufs[port_out].len = (uint16_t)len; 389 return; 390 } 391 392 /* Transmit packets */ 393 send_burst(qconf, (uint16_t)len, port_out); 394 qconf->tx_mbufs[port_out].len = 0; 395 } 396 397 /* main processing loop */ 398 static int 399 main_loop(__attribute__((unused)) void *dummy) 400 { 401 struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; 402 unsigned lcore_id; 403 uint64_t prev_tsc, diff_tsc, cur_tsc; 404 int i, j, nb_rx; 405 uint16_t portid; 406 struct lcore_queue_conf *qconf; 407 const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * BURST_TX_DRAIN_US; 408 409 prev_tsc = 0; 410 411 lcore_id = rte_lcore_id(); 412 qconf = &lcore_queue_conf[lcore_id]; 413 414 if (qconf->n_rx_queue == 0) { 415 RTE_LOG(INFO, IP_FRAG, "lcore %u has nothing to do\n", lcore_id); 416 return 0; 417 } 418 419 RTE_LOG(INFO, IP_FRAG, "entering main loop on lcore %u\n", lcore_id); 420 421 for (i = 0; i < qconf->n_rx_queue; i++) { 422 423 portid = qconf->rx_queue_list[i].portid; 424 RTE_LOG(INFO, IP_FRAG, " -- lcoreid=%u portid=%d\n", lcore_id, 425 portid); 426 } 427 428 while (1) { 429 430 cur_tsc = rte_rdtsc(); 431 432 /* 433 * TX burst queue drain 434 */ 435 diff_tsc = cur_tsc - prev_tsc; 436 if (unlikely(diff_tsc > drain_tsc)) { 437 438 /* 439 * This could be optimized (use queueid instead of 440 * portid), but it is not called so often 441 */ 442 for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++) { 443 if (qconf->tx_mbufs[portid].len == 0) 444 continue; 445 send_burst(&lcore_queue_conf[lcore_id], 446 qconf->tx_mbufs[portid].len, 447 portid); 448 qconf->tx_mbufs[portid].len = 0; 449 } 450 451 prev_tsc = cur_tsc; 452 } 453 454 /* 455 * Read packet from RX queues 456 */ 457 for (i = 0; i < qconf->n_rx_queue; i++) { 458 459 portid = qconf->rx_queue_list[i].portid; 460 nb_rx = rte_eth_rx_burst(portid, 0, pkts_burst, 461 MAX_PKT_BURST); 462 463 /* Prefetch first packets */ 464 for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++) { 465 rte_prefetch0(rte_pktmbuf_mtod( 466 pkts_burst[j], void *)); 467 } 468 469 /* Prefetch and forward already prefetched packets */ 470 for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) { 471 rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[ 472 j + PREFETCH_OFFSET], void *)); 473 l3fwd_simple_forward(pkts_burst[j], qconf, i, portid); 474 } 475 476 /* Forward remaining prefetched packets */ 477 for (; j < nb_rx; j++) { 478 l3fwd_simple_forward(pkts_burst[j], qconf, i, portid); 479 } 480 } 481 } 482 } 483 484 /* display usage */ 485 static void 486 print_usage(const char *prgname) 487 { 488 printf("%s [EAL options] -- -p PORTMASK [-q NQ]\n" 489 " -p PORTMASK: hexadecimal bitmask of ports to configure\n" 490 " -q NQ: number of queue (=ports) per lcore (default is 1)\n", 491 prgname); 492 } 493 494 static int 495 parse_portmask(const char *portmask) 496 { 497 char *end = NULL; 498 unsigned long pm; 499 500 /* parse hexadecimal string */ 501 pm = strtoul(portmask, &end, 16); 502 if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0')) 503 return -1; 504 505 if (pm == 0) 506 return -1; 507 508 return pm; 509 } 510 511 static int 512 parse_nqueue(const char *q_arg) 513 { 514 char *end = NULL; 515 unsigned long n; 516 517 /* parse hexadecimal string */ 518 n = strtoul(q_arg, &end, 10); 519 if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0')) 520 return -1; 521 if (n == 0) 522 return -1; 523 if (n >= MAX_RX_QUEUE_PER_LCORE) 524 return -1; 525 526 return n; 527 } 528 529 /* Parse the argument given in the command line of the application */ 530 static int 531 parse_args(int argc, char **argv) 532 { 533 int opt, ret; 534 char **argvopt; 535 int option_index; 536 char *prgname = argv[0]; 537 static struct option lgopts[] = { 538 {NULL, 0, 0, 0} 539 }; 540 541 argvopt = argv; 542 543 while ((opt = getopt_long(argc, argvopt, "p:q:", 544 lgopts, &option_index)) != EOF) { 545 546 switch (opt) { 547 /* portmask */ 548 case 'p': 549 enabled_port_mask = parse_portmask(optarg); 550 if (enabled_port_mask < 0) { 551 printf("invalid portmask\n"); 552 print_usage(prgname); 553 return -1; 554 } 555 break; 556 557 /* nqueue */ 558 case 'q': 559 rx_queue_per_lcore = parse_nqueue(optarg); 560 if (rx_queue_per_lcore < 0) { 561 printf("invalid queue number\n"); 562 print_usage(prgname); 563 return -1; 564 } 565 break; 566 567 /* long options */ 568 case 0: 569 print_usage(prgname); 570 return -1; 571 572 default: 573 print_usage(prgname); 574 return -1; 575 } 576 } 577 578 if (enabled_port_mask == 0) { 579 printf("portmask not specified\n"); 580 print_usage(prgname); 581 return -1; 582 } 583 584 if (optind >= 0) 585 argv[optind-1] = prgname; 586 587 ret = optind-1; 588 optind = 1; /* reset getopt lib */ 589 return ret; 590 } 591 592 static void 593 print_ethaddr(const char *name, struct ether_addr *eth_addr) 594 { 595 char buf[ETHER_ADDR_FMT_SIZE]; 596 ether_format_addr(buf, ETHER_ADDR_FMT_SIZE, eth_addr); 597 printf("%s%s", name, buf); 598 } 599 600 /* Check the link status of all ports in up to 9s, and print them finally */ 601 static void 602 check_all_ports_link_status(uint16_t port_num, uint32_t port_mask) 603 { 604 #define CHECK_INTERVAL 100 /* 100ms */ 605 #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */ 606 uint16_t portid; 607 uint8_t count, all_ports_up, print_flag = 0; 608 struct rte_eth_link link; 609 610 printf("\nChecking link status"); 611 fflush(stdout); 612 for (count = 0; count <= MAX_CHECK_TIME; count++) { 613 all_ports_up = 1; 614 for (portid = 0; portid < port_num; portid++) { 615 if ((port_mask & (1 << portid)) == 0) 616 continue; 617 memset(&link, 0, sizeof(link)); 618 rte_eth_link_get_nowait(portid, &link); 619 /* print link status if flag set */ 620 if (print_flag == 1) { 621 if (link.link_status) 622 printf( 623 "Port%d Link Up .Speed %u Mbps - %s\n", 624 portid, link.link_speed, 625 (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? 626 ("full-duplex") : ("half-duplex\n")); 627 else 628 printf("Port %d Link Down\n", portid); 629 continue; 630 } 631 /* clear all_ports_up flag if any link down */ 632 if (link.link_status == ETH_LINK_DOWN) { 633 all_ports_up = 0; 634 break; 635 } 636 } 637 /* after finally printing all link status, get out */ 638 if (print_flag == 1) 639 break; 640 641 if (all_ports_up == 0) { 642 printf("."); 643 fflush(stdout); 644 rte_delay_ms(CHECK_INTERVAL); 645 } 646 647 /* set the print_flag if all ports up or timeout */ 648 if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) { 649 print_flag = 1; 650 printf("\ndone\n"); 651 } 652 } 653 } 654 655 /* Check L3 packet type detection capablity of the NIC port */ 656 static int 657 check_ptype(int portid) 658 { 659 int i, ret; 660 int ptype_l3_ipv4 = 0, ptype_l3_ipv6 = 0; 661 uint32_t ptype_mask = RTE_PTYPE_L3_MASK; 662 663 ret = rte_eth_dev_get_supported_ptypes(portid, ptype_mask, NULL, 0); 664 if (ret <= 0) 665 return 0; 666 667 uint32_t ptypes[ret]; 668 669 ret = rte_eth_dev_get_supported_ptypes(portid, ptype_mask, ptypes, ret); 670 for (i = 0; i < ret; ++i) { 671 if (ptypes[i] & RTE_PTYPE_L3_IPV4) 672 ptype_l3_ipv4 = 1; 673 if (ptypes[i] & RTE_PTYPE_L3_IPV6) 674 ptype_l3_ipv6 = 1; 675 } 676 677 if (ptype_l3_ipv4 == 0) 678 printf("port %d cannot parse RTE_PTYPE_L3_IPV4\n", portid); 679 680 if (ptype_l3_ipv6 == 0) 681 printf("port %d cannot parse RTE_PTYPE_L3_IPV6\n", portid); 682 683 if (ptype_l3_ipv4 && ptype_l3_ipv6) 684 return 1; 685 686 return 0; 687 688 } 689 690 /* Parse packet type of a packet by SW */ 691 static inline void 692 parse_ptype(struct rte_mbuf *m) 693 { 694 struct ether_hdr *eth_hdr; 695 uint32_t packet_type = RTE_PTYPE_UNKNOWN; 696 uint16_t ether_type; 697 698 eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *); 699 ether_type = eth_hdr->ether_type; 700 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) 701 packet_type |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN; 702 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv6)) 703 packet_type |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN; 704 705 m->packet_type = packet_type; 706 } 707 708 /* callback function to detect packet type for a queue of a port */ 709 static uint16_t 710 cb_parse_ptype(uint16_t port __rte_unused, uint16_t queue __rte_unused, 711 struct rte_mbuf *pkts[], uint16_t nb_pkts, 712 uint16_t max_pkts __rte_unused, 713 void *user_param __rte_unused) 714 { 715 uint16_t i; 716 717 for (i = 0; i < nb_pkts; ++i) 718 parse_ptype(pkts[i]); 719 720 return nb_pkts; 721 } 722 723 static int 724 init_routing_table(void) 725 { 726 struct rte_lpm *lpm; 727 struct rte_lpm6 *lpm6; 728 int socket, ret; 729 unsigned i; 730 731 for (socket = 0; socket < RTE_MAX_NUMA_NODES; socket++) { 732 if (socket_lpm[socket]) { 733 lpm = socket_lpm[socket]; 734 /* populate the LPM table */ 735 for (i = 0; i < RTE_DIM(l3fwd_ipv4_route_array); i++) { 736 ret = rte_lpm_add(lpm, 737 l3fwd_ipv4_route_array[i].ip, 738 l3fwd_ipv4_route_array[i].depth, 739 l3fwd_ipv4_route_array[i].if_out); 740 741 if (ret < 0) { 742 RTE_LOG(ERR, IP_FRAG, "Unable to add entry %i to the l3fwd " 743 "LPM table\n", i); 744 return -1; 745 } 746 747 RTE_LOG(INFO, IP_FRAG, "Socket %i: adding route " IPv4_BYTES_FMT 748 "/%d (port %d)\n", 749 socket, 750 IPv4_BYTES(l3fwd_ipv4_route_array[i].ip), 751 l3fwd_ipv4_route_array[i].depth, 752 l3fwd_ipv4_route_array[i].if_out); 753 } 754 } 755 756 if (socket_lpm6[socket]) { 757 lpm6 = socket_lpm6[socket]; 758 /* populate the LPM6 table */ 759 for (i = 0; i < RTE_DIM(l3fwd_ipv6_route_array); i++) { 760 ret = rte_lpm6_add(lpm6, 761 l3fwd_ipv6_route_array[i].ip, 762 l3fwd_ipv6_route_array[i].depth, 763 l3fwd_ipv6_route_array[i].if_out); 764 765 if (ret < 0) { 766 RTE_LOG(ERR, IP_FRAG, "Unable to add entry %i to the l3fwd " 767 "LPM6 table\n", i); 768 return -1; 769 } 770 771 RTE_LOG(INFO, IP_FRAG, "Socket %i: adding route " IPv6_BYTES_FMT 772 "/%d (port %d)\n", 773 socket, 774 IPv6_BYTES(l3fwd_ipv6_route_array[i].ip), 775 l3fwd_ipv6_route_array[i].depth, 776 l3fwd_ipv6_route_array[i].if_out); 777 } 778 } 779 } 780 return 0; 781 } 782 783 static int 784 init_mem(void) 785 { 786 char buf[PATH_MAX]; 787 struct rte_mempool *mp; 788 struct rte_lpm *lpm; 789 struct rte_lpm6 *lpm6; 790 struct rte_lpm_config lpm_config; 791 int socket; 792 unsigned lcore_id; 793 794 /* traverse through lcores and initialize structures on each socket */ 795 796 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { 797 798 if (rte_lcore_is_enabled(lcore_id) == 0) 799 continue; 800 801 socket = rte_lcore_to_socket_id(lcore_id); 802 803 if (socket == SOCKET_ID_ANY) 804 socket = 0; 805 806 if (socket_direct_pool[socket] == NULL) { 807 RTE_LOG(INFO, IP_FRAG, "Creating direct mempool on socket %i\n", 808 socket); 809 snprintf(buf, sizeof(buf), "pool_direct_%i", socket); 810 811 mp = rte_pktmbuf_pool_create(buf, NB_MBUF, 32, 812 0, RTE_MBUF_DEFAULT_BUF_SIZE, socket); 813 if (mp == NULL) { 814 RTE_LOG(ERR, IP_FRAG, "Cannot create direct mempool\n"); 815 return -1; 816 } 817 socket_direct_pool[socket] = mp; 818 } 819 820 if (socket_indirect_pool[socket] == NULL) { 821 RTE_LOG(INFO, IP_FRAG, "Creating indirect mempool on socket %i\n", 822 socket); 823 snprintf(buf, sizeof(buf), "pool_indirect_%i", socket); 824 825 mp = rte_pktmbuf_pool_create(buf, NB_MBUF, 32, 0, 0, 826 socket); 827 if (mp == NULL) { 828 RTE_LOG(ERR, IP_FRAG, "Cannot create indirect mempool\n"); 829 return -1; 830 } 831 socket_indirect_pool[socket] = mp; 832 } 833 834 if (socket_lpm[socket] == NULL) { 835 RTE_LOG(INFO, IP_FRAG, "Creating LPM table on socket %i\n", socket); 836 snprintf(buf, sizeof(buf), "IP_FRAG_LPM_%i", socket); 837 838 lpm_config.max_rules = LPM_MAX_RULES; 839 lpm_config.number_tbl8s = 256; 840 lpm_config.flags = 0; 841 842 lpm = rte_lpm_create(buf, socket, &lpm_config); 843 if (lpm == NULL) { 844 RTE_LOG(ERR, IP_FRAG, "Cannot create LPM table\n"); 845 return -1; 846 } 847 socket_lpm[socket] = lpm; 848 } 849 850 if (socket_lpm6[socket] == NULL) { 851 RTE_LOG(INFO, IP_FRAG, "Creating LPM6 table on socket %i\n", socket); 852 snprintf(buf, sizeof(buf), "IP_FRAG_LPM_%i", socket); 853 854 lpm6 = rte_lpm6_create(buf, socket, &lpm6_config); 855 if (lpm6 == NULL) { 856 RTE_LOG(ERR, IP_FRAG, "Cannot create LPM table\n"); 857 return -1; 858 } 859 socket_lpm6[socket] = lpm6; 860 } 861 } 862 863 return 0; 864 } 865 866 int 867 main(int argc, char **argv) 868 { 869 struct lcore_queue_conf *qconf; 870 struct rte_eth_dev_info dev_info; 871 struct rte_eth_txconf *txconf; 872 struct rx_queue *rxq; 873 int socket, ret; 874 unsigned nb_ports; 875 uint16_t queueid = 0; 876 unsigned lcore_id = 0, rx_lcore_id = 0; 877 uint32_t n_tx_queue, nb_lcores; 878 uint16_t portid; 879 880 /* init EAL */ 881 ret = rte_eal_init(argc, argv); 882 if (ret < 0) 883 rte_exit(EXIT_FAILURE, "rte_eal_init failed"); 884 argc -= ret; 885 argv += ret; 886 887 /* parse application arguments (after the EAL ones) */ 888 ret = parse_args(argc, argv); 889 if (ret < 0) 890 rte_exit(EXIT_FAILURE, "Invalid arguments"); 891 892 nb_ports = rte_eth_dev_count(); 893 if (nb_ports == 0) 894 rte_exit(EXIT_FAILURE, "No ports found!\n"); 895 896 nb_lcores = rte_lcore_count(); 897 898 /* initialize structures (mempools, lpm etc.) */ 899 if (init_mem() < 0) 900 rte_panic("Cannot initialize memory structures!\n"); 901 902 /* check if portmask has non-existent ports */ 903 if (enabled_port_mask & ~(RTE_LEN2MASK(nb_ports, unsigned))) 904 rte_exit(EXIT_FAILURE, "Non-existent ports in portmask!\n"); 905 906 /* initialize all ports */ 907 for (portid = 0; portid < nb_ports; portid++) { 908 /* skip ports that are not enabled */ 909 if ((enabled_port_mask & (1 << portid)) == 0) { 910 printf("Skipping disabled port %d\n", portid); 911 continue; 912 } 913 914 qconf = &lcore_queue_conf[rx_lcore_id]; 915 916 /* limit the frame size to the maximum supported by NIC */ 917 rte_eth_dev_info_get(portid, &dev_info); 918 port_conf.rxmode.max_rx_pkt_len = RTE_MIN( 919 dev_info.max_rx_pktlen, port_conf.rxmode.max_rx_pkt_len); 920 921 /* get the lcore_id for this port */ 922 while (rte_lcore_is_enabled(rx_lcore_id) == 0 || 923 qconf->n_rx_queue == (unsigned)rx_queue_per_lcore) { 924 925 rx_lcore_id ++; 926 if (rx_lcore_id >= RTE_MAX_LCORE) 927 rte_exit(EXIT_FAILURE, "Not enough cores\n"); 928 929 qconf = &lcore_queue_conf[rx_lcore_id]; 930 } 931 932 socket = (int) rte_lcore_to_socket_id(rx_lcore_id); 933 if (socket == SOCKET_ID_ANY) 934 socket = 0; 935 936 rxq = &qconf->rx_queue_list[qconf->n_rx_queue]; 937 rxq->portid = portid; 938 rxq->direct_pool = socket_direct_pool[socket]; 939 rxq->indirect_pool = socket_indirect_pool[socket]; 940 rxq->lpm = socket_lpm[socket]; 941 rxq->lpm6 = socket_lpm6[socket]; 942 qconf->n_rx_queue++; 943 944 /* init port */ 945 printf("Initializing port %d on lcore %u...", portid, 946 rx_lcore_id); 947 fflush(stdout); 948 949 n_tx_queue = nb_lcores; 950 if (n_tx_queue > MAX_TX_QUEUE_PER_PORT) 951 n_tx_queue = MAX_TX_QUEUE_PER_PORT; 952 ret = rte_eth_dev_configure(portid, 1, (uint16_t)n_tx_queue, 953 &port_conf); 954 if (ret < 0) { 955 printf("\n"); 956 rte_exit(EXIT_FAILURE, "Cannot configure device: " 957 "err=%d, port=%d\n", 958 ret, portid); 959 } 960 961 ret = rte_eth_dev_adjust_nb_rx_tx_desc(portid, &nb_rxd, 962 &nb_txd); 963 if (ret < 0) { 964 printf("\n"); 965 rte_exit(EXIT_FAILURE, "Cannot adjust number of " 966 "descriptors: err=%d, port=%d\n", ret, portid); 967 } 968 969 /* init one RX queue */ 970 ret = rte_eth_rx_queue_setup(portid, 0, nb_rxd, 971 socket, NULL, 972 socket_direct_pool[socket]); 973 if (ret < 0) { 974 printf("\n"); 975 rte_exit(EXIT_FAILURE, "rte_eth_rx_queue_setup: " 976 "err=%d, port=%d\n", 977 ret, portid); 978 } 979 980 rte_eth_macaddr_get(portid, &ports_eth_addr[portid]); 981 print_ethaddr(" Address:", &ports_eth_addr[portid]); 982 printf("\n"); 983 984 /* init one TX queue per couple (lcore,port) */ 985 queueid = 0; 986 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { 987 if (rte_lcore_is_enabled(lcore_id) == 0) 988 continue; 989 990 socket = (int) rte_lcore_to_socket_id(lcore_id); 991 printf("txq=%u,%d ", lcore_id, queueid); 992 fflush(stdout); 993 994 txconf = &dev_info.default_txconf; 995 txconf->txq_flags = 0; 996 ret = rte_eth_tx_queue_setup(portid, queueid, nb_txd, 997 socket, txconf); 998 if (ret < 0) { 999 printf("\n"); 1000 rte_exit(EXIT_FAILURE, "rte_eth_tx_queue_setup: " 1001 "err=%d, port=%d\n", ret, portid); 1002 } 1003 1004 qconf = &lcore_queue_conf[lcore_id]; 1005 qconf->tx_queue_id[portid] = queueid; 1006 queueid++; 1007 } 1008 1009 printf("\n"); 1010 } 1011 1012 printf("\n"); 1013 1014 /* start ports */ 1015 for (portid = 0; portid < nb_ports; portid++) { 1016 if ((enabled_port_mask & (1 << portid)) == 0) { 1017 continue; 1018 } 1019 /* Start device */ 1020 ret = rte_eth_dev_start(portid); 1021 if (ret < 0) 1022 rte_exit(EXIT_FAILURE, "rte_eth_dev_start: err=%d, port=%d\n", 1023 ret, portid); 1024 1025 rte_eth_promiscuous_enable(portid); 1026 1027 if (check_ptype(portid) == 0) { 1028 rte_eth_add_rx_callback(portid, 0, cb_parse_ptype, NULL); 1029 printf("Add Rx callback function to detect L3 packet type by SW :" 1030 " port = %d\n", portid); 1031 } 1032 } 1033 1034 if (init_routing_table() < 0) 1035 rte_exit(EXIT_FAILURE, "Cannot init routing table\n"); 1036 1037 check_all_ports_link_status(nb_ports, enabled_port_mask); 1038 1039 /* launch per-lcore init on every lcore */ 1040 rte_eal_mp_remote_launch(main_loop, NULL, CALL_MASTER); 1041 RTE_LCORE_FOREACH_SLAVE(lcore_id) { 1042 if (rte_eal_wait_lcore(lcore_id) < 0) 1043 return -1; 1044 } 1045 1046 return 0; 1047 } 1048