1 /*- 2 * BSD LICENSE 3 * 4 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <stdio.h> 35 #include <stdlib.h> 36 #include <stdint.h> 37 #include <inttypes.h> 38 #include <sys/types.h> 39 #include <string.h> 40 #include <sys/queue.h> 41 #include <stdarg.h> 42 #include <errno.h> 43 #include <getopt.h> 44 #include <unistd.h> 45 #include <signal.h> 46 47 #include <rte_common.h> 48 #include <rte_byteorder.h> 49 #include <rte_log.h> 50 #include <rte_memory.h> 51 #include <rte_memcpy.h> 52 #include <rte_memzone.h> 53 #include <rte_tailq.h> 54 #include <rte_eal.h> 55 #include <rte_per_lcore.h> 56 #include <rte_launch.h> 57 #include <rte_atomic.h> 58 #include <rte_cycles.h> 59 #include <rte_prefetch.h> 60 #include <rte_lcore.h> 61 #include <rte_per_lcore.h> 62 #include <rte_branch_prediction.h> 63 #include <rte_interrupts.h> 64 #include <rte_pci.h> 65 #include <rte_random.h> 66 #include <rte_debug.h> 67 #include <rte_ether.h> 68 #include <rte_ethdev.h> 69 #include <rte_ring.h> 70 #include <rte_mempool.h> 71 #include <rte_mbuf.h> 72 #include <rte_ip.h> 73 #include <rte_tcp.h> 74 #include <rte_udp.h> 75 #include <rte_string_fns.h> 76 #include <rte_timer.h> 77 #include <rte_power.h> 78 79 #define RTE_LOGTYPE_L3FWD_POWER RTE_LOGTYPE_USER1 80 81 #define MAX_PKT_BURST 32 82 83 #define MIN_ZERO_POLL_COUNT 5 84 85 /* around 100ms at 2 Ghz */ 86 #define TIMER_RESOLUTION_CYCLES 200000000ULL 87 /* 100 ms interval */ 88 #define TIMER_NUMBER_PER_SECOND 10 89 /* 100000 us */ 90 #define SCALING_PERIOD (1000000/TIMER_NUMBER_PER_SECOND) 91 #define SCALING_DOWN_TIME_RATIO_THRESHOLD 0.25 92 93 #define APP_LOOKUP_EXACT_MATCH 0 94 #define APP_LOOKUP_LPM 1 95 #define DO_RFC_1812_CHECKS 96 97 #ifndef APP_LOOKUP_METHOD 98 #define APP_LOOKUP_METHOD APP_LOOKUP_LPM 99 #endif 100 101 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 102 #include <rte_hash.h> 103 #elif (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 104 #include <rte_lpm.h> 105 #else 106 #error "APP_LOOKUP_METHOD set to incorrect value" 107 #endif 108 109 #ifndef IPv6_BYTES 110 #define IPv6_BYTES_FMT "%02x%02x:%02x%02x:%02x%02x:%02x%02x:"\ 111 "%02x%02x:%02x%02x:%02x%02x:%02x%02x" 112 #define IPv6_BYTES(addr) \ 113 addr[0], addr[1], addr[2], addr[3], \ 114 addr[4], addr[5], addr[6], addr[7], \ 115 addr[8], addr[9], addr[10], addr[11],\ 116 addr[12], addr[13],addr[14], addr[15] 117 #endif 118 119 #define MAX_JUMBO_PKT_LEN 9600 120 121 #define IPV6_ADDR_LEN 16 122 123 #define MEMPOOL_CACHE_SIZE 256 124 125 #define MBUF_SIZE (2048 + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM) 126 127 /* 128 * This expression is used to calculate the number of mbufs needed depending on 129 * user input, taking into account memory for rx and tx hardware rings, cache 130 * per lcore and mtable per port per lcore. RTE_MAX is used to ensure that 131 * NB_MBUF never goes below a minimum value of 8192. 132 */ 133 134 #define NB_MBUF RTE_MAX ( \ 135 (nb_ports*nb_rx_queue*RTE_TEST_RX_DESC_DEFAULT + \ 136 nb_ports*nb_lcores*MAX_PKT_BURST + \ 137 nb_ports*n_tx_queue*RTE_TEST_TX_DESC_DEFAULT + \ 138 nb_lcores*MEMPOOL_CACHE_SIZE), \ 139 (unsigned)8192) 140 141 #define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */ 142 143 #define NB_SOCKETS 8 144 145 /* Configure how many packets ahead to prefetch, when reading packets */ 146 #define PREFETCH_OFFSET 3 147 148 /* 149 * Configurable number of RX/TX ring descriptors 150 */ 151 #define RTE_TEST_RX_DESC_DEFAULT 128 152 #define RTE_TEST_TX_DESC_DEFAULT 512 153 static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT; 154 static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT; 155 156 /* ethernet addresses of ports */ 157 static struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS]; 158 159 /* mask of enabled ports */ 160 static uint32_t enabled_port_mask = 0; 161 /* Ports set in promiscuous mode off by default. */ 162 static int promiscuous_on = 0; 163 /* NUMA is enabled by default. */ 164 static int numa_on = 1; 165 166 enum freq_scale_hint_t 167 { 168 FREQ_LOWER = -1, 169 FREQ_CURRENT = 0, 170 FREQ_HIGHER = 1, 171 FREQ_HIGHEST = 2 172 }; 173 174 struct mbuf_table { 175 uint16_t len; 176 struct rte_mbuf *m_table[MAX_PKT_BURST]; 177 }; 178 179 struct lcore_rx_queue { 180 uint8_t port_id; 181 uint8_t queue_id; 182 enum freq_scale_hint_t freq_up_hint; 183 uint32_t zero_rx_packet_count; 184 uint32_t idle_hint; 185 } __rte_cache_aligned; 186 187 #define MAX_RX_QUEUE_PER_LCORE 16 188 #define MAX_TX_QUEUE_PER_PORT RTE_MAX_ETHPORTS 189 #define MAX_RX_QUEUE_PER_PORT 128 190 191 #define MAX_LCORE_PARAMS 1024 192 struct lcore_params { 193 uint8_t port_id; 194 uint8_t queue_id; 195 uint8_t lcore_id; 196 } __rte_cache_aligned; 197 198 static struct lcore_params lcore_params_array[MAX_LCORE_PARAMS]; 199 static struct lcore_params lcore_params_array_default[] = { 200 {0, 0, 2}, 201 {0, 1, 2}, 202 {0, 2, 2}, 203 {1, 0, 2}, 204 {1, 1, 2}, 205 {1, 2, 2}, 206 {2, 0, 2}, 207 {3, 0, 3}, 208 {3, 1, 3}, 209 }; 210 211 static struct lcore_params * lcore_params = lcore_params_array_default; 212 static uint16_t nb_lcore_params = sizeof(lcore_params_array_default) / 213 sizeof(lcore_params_array_default[0]); 214 215 static struct rte_eth_conf port_conf = { 216 .rxmode = { 217 .mq_mode = ETH_MQ_RX_RSS, 218 .max_rx_pkt_len = ETHER_MAX_LEN, 219 .split_hdr_size = 0, 220 .header_split = 0, /**< Header Split disabled */ 221 .hw_ip_checksum = 1, /**< IP checksum offload enabled */ 222 .hw_vlan_filter = 0, /**< VLAN filtering disabled */ 223 .jumbo_frame = 0, /**< Jumbo Frame Support disabled */ 224 .hw_strip_crc = 0, /**< CRC stripped by hardware */ 225 }, 226 .rx_adv_conf = { 227 .rss_conf = { 228 .rss_key = NULL, 229 .rss_hf = ETH_RSS_IP, 230 }, 231 }, 232 .txmode = { 233 .mq_mode = ETH_DCB_NONE, 234 }, 235 }; 236 237 static struct rte_mempool * pktmbuf_pool[NB_SOCKETS]; 238 239 240 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 241 242 #ifdef RTE_MACHINE_CPUFLAG_SSE4_2 243 #include <rte_hash_crc.h> 244 #define DEFAULT_HASH_FUNC rte_hash_crc 245 #else 246 #include <rte_jhash.h> 247 #define DEFAULT_HASH_FUNC rte_jhash 248 #endif 249 250 struct ipv4_5tuple { 251 uint32_t ip_dst; 252 uint32_t ip_src; 253 uint16_t port_dst; 254 uint16_t port_src; 255 uint8_t proto; 256 } __attribute__((__packed__)); 257 258 struct ipv6_5tuple { 259 uint8_t ip_dst[IPV6_ADDR_LEN]; 260 uint8_t ip_src[IPV6_ADDR_LEN]; 261 uint16_t port_dst; 262 uint16_t port_src; 263 uint8_t proto; 264 } __attribute__((__packed__)); 265 266 struct ipv4_l3fwd_route { 267 struct ipv4_5tuple key; 268 uint8_t if_out; 269 }; 270 271 struct ipv6_l3fwd_route { 272 struct ipv6_5tuple key; 273 uint8_t if_out; 274 }; 275 276 static struct ipv4_l3fwd_route ipv4_l3fwd_route_array[] = { 277 {{IPv4(100,10,0,1), IPv4(200,10,0,1), 101, 11, IPPROTO_TCP}, 0}, 278 {{IPv4(100,20,0,2), IPv4(200,20,0,2), 102, 12, IPPROTO_TCP}, 1}, 279 {{IPv4(100,30,0,3), IPv4(200,30,0,3), 103, 13, IPPROTO_TCP}, 2}, 280 {{IPv4(100,40,0,4), IPv4(200,40,0,4), 104, 14, IPPROTO_TCP}, 3}, 281 }; 282 283 static struct ipv6_l3fwd_route ipv6_l3fwd_route_array[] = { 284 { 285 { 286 {0xfe, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 287 0x02, 0x1b, 0x21, 0xff, 0xfe, 0x91, 0x38, 0x05}, 288 {0xfe, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 289 0x02, 0x1e, 0x67, 0xff, 0xfe, 0x0d, 0xb6, 0x0a}, 290 1, 10, IPPROTO_UDP 291 }, 4 292 }, 293 }; 294 295 typedef struct rte_hash lookup_struct_t; 296 static lookup_struct_t *ipv4_l3fwd_lookup_struct[NB_SOCKETS]; 297 static lookup_struct_t *ipv6_l3fwd_lookup_struct[NB_SOCKETS]; 298 299 #define L3FWD_HASH_ENTRIES 1024 300 301 #define IPV4_L3FWD_NUM_ROUTES \ 302 (sizeof(ipv4_l3fwd_route_array) / sizeof(ipv4_l3fwd_route_array[0])) 303 304 #define IPV6_L3FWD_NUM_ROUTES \ 305 (sizeof(ipv6_l3fwd_route_array) / sizeof(ipv6_l3fwd_route_array[0])) 306 307 static uint8_t ipv4_l3fwd_out_if[L3FWD_HASH_ENTRIES] __rte_cache_aligned; 308 static uint8_t ipv6_l3fwd_out_if[L3FWD_HASH_ENTRIES] __rte_cache_aligned; 309 #endif 310 311 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 312 struct ipv4_l3fwd_route { 313 uint32_t ip; 314 uint8_t depth; 315 uint8_t if_out; 316 }; 317 318 static struct ipv4_l3fwd_route ipv4_l3fwd_route_array[] = { 319 {IPv4(1,1,1,0), 24, 0}, 320 {IPv4(2,1,1,0), 24, 1}, 321 {IPv4(3,1,1,0), 24, 2}, 322 {IPv4(4,1,1,0), 24, 3}, 323 {IPv4(5,1,1,0), 24, 4}, 324 {IPv4(6,1,1,0), 24, 5}, 325 {IPv4(7,1,1,0), 24, 6}, 326 {IPv4(8,1,1,0), 24, 7}, 327 }; 328 329 #define IPV4_L3FWD_NUM_ROUTES \ 330 (sizeof(ipv4_l3fwd_route_array) / sizeof(ipv4_l3fwd_route_array[0])) 331 332 #define IPV4_L3FWD_LPM_MAX_RULES 1024 333 334 typedef struct rte_lpm lookup_struct_t; 335 static lookup_struct_t *ipv4_l3fwd_lookup_struct[NB_SOCKETS]; 336 #endif 337 338 struct lcore_conf { 339 uint16_t n_rx_queue; 340 struct lcore_rx_queue rx_queue_list[MAX_RX_QUEUE_PER_LCORE]; 341 uint16_t tx_queue_id[RTE_MAX_ETHPORTS]; 342 struct mbuf_table tx_mbufs[RTE_MAX_ETHPORTS]; 343 lookup_struct_t * ipv4_lookup_struct; 344 lookup_struct_t * ipv6_lookup_struct; 345 } __rte_cache_aligned; 346 347 struct lcore_stats { 348 /* total sleep time in ms since last frequency scaling down */ 349 uint32_t sleep_time; 350 /* number of long sleep recently */ 351 uint32_t nb_long_sleep; 352 /* freq. scaling up trend */ 353 uint32_t trend; 354 /* total packet processed recently */ 355 uint64_t nb_rx_processed; 356 /* total iterations looped recently */ 357 uint64_t nb_iteration_looped; 358 uint32_t padding[9]; 359 } __rte_cache_aligned; 360 361 static struct lcore_conf lcore_conf[RTE_MAX_LCORE] __rte_cache_aligned; 362 static struct lcore_stats stats[RTE_MAX_LCORE] __rte_cache_aligned; 363 static struct rte_timer power_timers[RTE_MAX_LCORE]; 364 365 static inline uint32_t power_idle_heuristic(uint32_t zero_rx_packet_count); 366 static inline enum freq_scale_hint_t power_freq_scaleup_heuristic( \ 367 unsigned lcore_id, uint8_t port_id, uint16_t queue_id); 368 369 /* exit signal handler */ 370 static void 371 signal_exit_now(int sigtype) 372 { 373 unsigned lcore_id; 374 int ret; 375 376 if (sigtype == SIGINT) { 377 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { 378 if (rte_lcore_is_enabled(lcore_id) == 0) 379 continue; 380 381 /* init power management library */ 382 ret = rte_power_exit(lcore_id); 383 if (ret) 384 rte_exit(EXIT_FAILURE, "Power management " 385 "library de-initialization failed on " 386 "core%u\n", lcore_id); 387 } 388 } 389 390 rte_exit(EXIT_SUCCESS, "User forced exit\n"); 391 } 392 393 /* Freqency scale down timer callback */ 394 static void 395 power_timer_cb(__attribute__((unused)) struct rte_timer *tim, 396 __attribute__((unused)) void *arg) 397 { 398 uint64_t hz; 399 float sleep_time_ratio; 400 unsigned lcore_id = rte_lcore_id(); 401 402 /* accumulate total execution time in us when callback is invoked */ 403 sleep_time_ratio = (float)(stats[lcore_id].sleep_time) / 404 (float)SCALING_PERIOD; 405 406 /** 407 * check whether need to scale down frequency a step if it sleep a lot. 408 */ 409 if (sleep_time_ratio >= SCALING_DOWN_TIME_RATIO_THRESHOLD) 410 rte_power_freq_down(lcore_id); 411 else if ( (unsigned)(stats[lcore_id].nb_rx_processed / 412 stats[lcore_id].nb_iteration_looped) < MAX_PKT_BURST) 413 /** 414 * scale down a step if average packet per iteration less 415 * than expectation. 416 */ 417 rte_power_freq_down(lcore_id); 418 419 /** 420 * initialize another timer according to current frequency to ensure 421 * timer interval is relatively fixed. 422 */ 423 hz = rte_get_timer_hz(); 424 rte_timer_reset(&power_timers[lcore_id], hz/TIMER_NUMBER_PER_SECOND, 425 SINGLE, lcore_id, power_timer_cb, NULL); 426 427 stats[lcore_id].nb_rx_processed = 0; 428 stats[lcore_id].nb_iteration_looped = 0; 429 430 stats[lcore_id].sleep_time = 0; 431 } 432 433 /* Send burst of packets on an output interface */ 434 static inline int 435 send_burst(struct lcore_conf *qconf, uint16_t n, uint8_t port) 436 { 437 struct rte_mbuf **m_table; 438 int ret; 439 uint16_t queueid; 440 441 queueid = qconf->tx_queue_id[port]; 442 m_table = (struct rte_mbuf **)qconf->tx_mbufs[port].m_table; 443 444 ret = rte_eth_tx_burst(port, queueid, m_table, n); 445 if (unlikely(ret < n)) { 446 do { 447 rte_pktmbuf_free(m_table[ret]); 448 } while (++ret < n); 449 } 450 451 return 0; 452 } 453 454 /* Enqueue a single packet, and send burst if queue is filled */ 455 static inline int 456 send_single_packet(struct rte_mbuf *m, uint8_t port) 457 { 458 uint32_t lcore_id; 459 uint16_t len; 460 struct lcore_conf *qconf; 461 462 lcore_id = rte_lcore_id(); 463 464 qconf = &lcore_conf[lcore_id]; 465 len = qconf->tx_mbufs[port].len; 466 qconf->tx_mbufs[port].m_table[len] = m; 467 len++; 468 469 /* enough pkts to be sent */ 470 if (unlikely(len == MAX_PKT_BURST)) { 471 send_burst(qconf, MAX_PKT_BURST, port); 472 len = 0; 473 } 474 475 qconf->tx_mbufs[port].len = len; 476 return 0; 477 } 478 479 #ifdef DO_RFC_1812_CHECKS 480 static inline int 481 is_valid_ipv4_pkt(struct ipv4_hdr *pkt, uint32_t link_len) 482 { 483 /* From http://www.rfc-editor.org/rfc/rfc1812.txt section 5.2.2 */ 484 /* 485 * 1. The packet length reported by the Link Layer must be large 486 * enough to hold the minimum length legal IP datagram (20 bytes). 487 */ 488 if (link_len < sizeof(struct ipv4_hdr)) 489 return -1; 490 491 /* 2. The IP checksum must be correct. */ 492 /* this is checked in H/W */ 493 494 /* 495 * 3. The IP version number must be 4. If the version number is not 4 496 * then the packet may be another version of IP, such as IPng or 497 * ST-II. 498 */ 499 if (((pkt->version_ihl) >> 4) != 4) 500 return -3; 501 /* 502 * 4. The IP header length field must be large enough to hold the 503 * minimum length legal IP datagram (20 bytes = 5 words). 504 */ 505 if ((pkt->version_ihl & 0xf) < 5) 506 return -4; 507 508 /* 509 * 5. The IP total length field must be large enough to hold the IP 510 * datagram header, whose length is specified in the IP header length 511 * field. 512 */ 513 if (rte_cpu_to_be_16(pkt->total_length) < sizeof(struct ipv4_hdr)) 514 return -5; 515 516 return 0; 517 } 518 #endif 519 520 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 521 static void 522 print_ipv4_key(struct ipv4_5tuple key) 523 { 524 printf("IP dst = %08x, IP src = %08x, port dst = %d, port src = %d, " 525 "proto = %d\n", (unsigned)key.ip_dst, (unsigned)key.ip_src, 526 key.port_dst, key.port_src, key.proto); 527 } 528 static void 529 print_ipv6_key(struct ipv6_5tuple key) 530 { 531 printf( "IP dst = " IPv6_BYTES_FMT ", IP src = " IPv6_BYTES_FMT ", " 532 "port dst = %d, port src = %d, proto = %d\n", 533 IPv6_BYTES(key.ip_dst), IPv6_BYTES(key.ip_src), 534 key.port_dst, key.port_src, key.proto); 535 } 536 537 static inline uint8_t 538 get_ipv4_dst_port(struct ipv4_hdr *ipv4_hdr, uint8_t portid, 539 lookup_struct_t * ipv4_l3fwd_lookup_struct) 540 { 541 struct ipv4_5tuple key; 542 struct tcp_hdr *tcp; 543 struct udp_hdr *udp; 544 int ret = 0; 545 546 key.ip_dst = rte_be_to_cpu_32(ipv4_hdr->dst_addr); 547 key.ip_src = rte_be_to_cpu_32(ipv4_hdr->src_addr); 548 key.proto = ipv4_hdr->next_proto_id; 549 550 switch (ipv4_hdr->next_proto_id) { 551 case IPPROTO_TCP: 552 tcp = (struct tcp_hdr *)((unsigned char *)ipv4_hdr + 553 sizeof(struct ipv4_hdr)); 554 key.port_dst = rte_be_to_cpu_16(tcp->dst_port); 555 key.port_src = rte_be_to_cpu_16(tcp->src_port); 556 break; 557 558 case IPPROTO_UDP: 559 udp = (struct udp_hdr *)((unsigned char *)ipv4_hdr + 560 sizeof(struct ipv4_hdr)); 561 key.port_dst = rte_be_to_cpu_16(udp->dst_port); 562 key.port_src = rte_be_to_cpu_16(udp->src_port); 563 break; 564 565 default: 566 key.port_dst = 0; 567 key.port_src = 0; 568 break; 569 } 570 571 /* Find destination port */ 572 ret = rte_hash_lookup(ipv4_l3fwd_lookup_struct, (const void *)&key); 573 return (uint8_t)((ret < 0)? portid : ipv4_l3fwd_out_if[ret]); 574 } 575 576 static inline uint8_t 577 get_ipv6_dst_port(struct ipv6_hdr *ipv6_hdr, uint8_t portid, 578 lookup_struct_t *ipv6_l3fwd_lookup_struct) 579 { 580 struct ipv6_5tuple key; 581 struct tcp_hdr *tcp; 582 struct udp_hdr *udp; 583 int ret = 0; 584 585 memcpy(key.ip_dst, ipv6_hdr->dst_addr, IPV6_ADDR_LEN); 586 memcpy(key.ip_src, ipv6_hdr->src_addr, IPV6_ADDR_LEN); 587 588 key.proto = ipv6_hdr->proto; 589 590 switch (ipv6_hdr->proto) { 591 case IPPROTO_TCP: 592 tcp = (struct tcp_hdr *)((unsigned char *) ipv6_hdr + 593 sizeof(struct ipv6_hdr)); 594 key.port_dst = rte_be_to_cpu_16(tcp->dst_port); 595 key.port_src = rte_be_to_cpu_16(tcp->src_port); 596 break; 597 598 case IPPROTO_UDP: 599 udp = (struct udp_hdr *)((unsigned char *) ipv6_hdr + 600 sizeof(struct ipv6_hdr)); 601 key.port_dst = rte_be_to_cpu_16(udp->dst_port); 602 key.port_src = rte_be_to_cpu_16(udp->src_port); 603 break; 604 605 default: 606 key.port_dst = 0; 607 key.port_src = 0; 608 break; 609 } 610 611 /* Find destination port */ 612 ret = rte_hash_lookup(ipv6_l3fwd_lookup_struct, (const void *)&key); 613 return (uint8_t)((ret < 0)? portid : ipv6_l3fwd_out_if[ret]); 614 } 615 #endif 616 617 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 618 static inline uint8_t 619 get_ipv4_dst_port(struct ipv4_hdr *ipv4_hdr, uint8_t portid, 620 lookup_struct_t *ipv4_l3fwd_lookup_struct) 621 { 622 uint8_t next_hop; 623 624 return (uint8_t) ((rte_lpm_lookup(ipv4_l3fwd_lookup_struct, 625 rte_be_to_cpu_32(ipv4_hdr->dst_addr), &next_hop) == 0)? 626 next_hop : portid); 627 } 628 #endif 629 630 static inline void 631 l3fwd_simple_forward(struct rte_mbuf *m, uint8_t portid, 632 struct lcore_conf *qconf) 633 { 634 struct ether_hdr *eth_hdr; 635 struct ipv4_hdr *ipv4_hdr; 636 void *d_addr_bytes; 637 uint8_t dst_port; 638 639 eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *); 640 641 if (m->ol_flags & PKT_RX_IPV4_HDR) { 642 /* Handle IPv4 headers.*/ 643 ipv4_hdr = 644 (struct ipv4_hdr *)(rte_pktmbuf_mtod(m, unsigned char*) 645 + sizeof(struct ether_hdr)); 646 647 #ifdef DO_RFC_1812_CHECKS 648 /* Check to make sure the packet is valid (RFC1812) */ 649 if (is_valid_ipv4_pkt(ipv4_hdr, m->pkt_len) < 0) { 650 rte_pktmbuf_free(m); 651 return; 652 } 653 #endif 654 655 dst_port = get_ipv4_dst_port(ipv4_hdr, portid, 656 qconf->ipv4_lookup_struct); 657 if (dst_port >= RTE_MAX_ETHPORTS || 658 (enabled_port_mask & 1 << dst_port) == 0) 659 dst_port = portid; 660 661 /* 02:00:00:00:00:xx */ 662 d_addr_bytes = ð_hdr->d_addr.addr_bytes[0]; 663 *((uint64_t *)d_addr_bytes) = 664 0x000000000002 + ((uint64_t)dst_port << 40); 665 666 #ifdef DO_RFC_1812_CHECKS 667 /* Update time to live and header checksum */ 668 --(ipv4_hdr->time_to_live); 669 ++(ipv4_hdr->hdr_checksum); 670 #endif 671 672 /* src addr */ 673 ether_addr_copy(&ports_eth_addr[dst_port], ð_hdr->s_addr); 674 675 send_single_packet(m, dst_port); 676 } 677 else { 678 /* Handle IPv6 headers.*/ 679 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 680 struct ipv6_hdr *ipv6_hdr; 681 682 ipv6_hdr = 683 (struct ipv6_hdr *)(rte_pktmbuf_mtod(m, unsigned char*) 684 + sizeof(struct ether_hdr)); 685 686 dst_port = get_ipv6_dst_port(ipv6_hdr, portid, 687 qconf->ipv6_lookup_struct); 688 689 if (dst_port >= RTE_MAX_ETHPORTS || 690 (enabled_port_mask & 1 << dst_port) == 0) 691 dst_port = portid; 692 693 /* 02:00:00:00:00:xx */ 694 d_addr_bytes = ð_hdr->d_addr.addr_bytes[0]; 695 *((uint64_t *)d_addr_bytes) = 696 0x000000000002 + ((uint64_t)dst_port << 40); 697 698 /* src addr */ 699 ether_addr_copy(&ports_eth_addr[dst_port], ð_hdr->s_addr); 700 701 send_single_packet(m, dst_port); 702 #else 703 /* We don't currently handle IPv6 packets in LPM mode. */ 704 rte_pktmbuf_free(m); 705 #endif 706 } 707 708 } 709 710 #define SLEEP_GEAR1_THRESHOLD 100 711 #define SLEEP_GEAR2_THRESHOLD 1000 712 713 static inline uint32_t 714 power_idle_heuristic(uint32_t zero_rx_packet_count) 715 { 716 /* If zero count is less than 100, use it as the sleep time in us */ 717 if (zero_rx_packet_count < SLEEP_GEAR1_THRESHOLD) 718 return zero_rx_packet_count; 719 /* If zero count is less than 1000, sleep time should be 100 us */ 720 else if ((zero_rx_packet_count >= SLEEP_GEAR1_THRESHOLD) && 721 (zero_rx_packet_count < SLEEP_GEAR2_THRESHOLD)) 722 return SLEEP_GEAR1_THRESHOLD; 723 /* If zero count is greater than 1000, sleep time should be 1000 us */ 724 else if (zero_rx_packet_count >= SLEEP_GEAR2_THRESHOLD) 725 return SLEEP_GEAR2_THRESHOLD; 726 727 return 0; 728 } 729 730 static inline enum freq_scale_hint_t 731 power_freq_scaleup_heuristic(unsigned lcore_id, 732 uint8_t port_id, 733 uint16_t queue_id) 734 { 735 /** 736 * HW Rx queue size is 128 by default, Rx burst read at maximum 32 entries 737 * per iteration 738 */ 739 #define FREQ_GEAR1_RX_PACKET_THRESHOLD MAX_PKT_BURST 740 #define FREQ_GEAR2_RX_PACKET_THRESHOLD (MAX_PKT_BURST*2) 741 #define FREQ_GEAR3_RX_PACKET_THRESHOLD (MAX_PKT_BURST*3) 742 #define FREQ_UP_TREND1_ACC 1 743 #define FREQ_UP_TREND2_ACC 100 744 #define FREQ_UP_THRESHOLD 10000 745 746 if (likely(rte_eth_rx_descriptor_done(port_id, queue_id, 747 FREQ_GEAR3_RX_PACKET_THRESHOLD) > 0)) { 748 stats[lcore_id].trend = 0; 749 return FREQ_HIGHEST; 750 } else if (likely(rte_eth_rx_descriptor_done(port_id, queue_id, 751 FREQ_GEAR2_RX_PACKET_THRESHOLD) > 0)) 752 stats[lcore_id].trend += FREQ_UP_TREND2_ACC; 753 else if (likely(rte_eth_rx_descriptor_done(port_id, queue_id, 754 FREQ_GEAR1_RX_PACKET_THRESHOLD) > 0)) 755 stats[lcore_id].trend += FREQ_UP_TREND1_ACC; 756 757 if (likely(stats[lcore_id].trend > FREQ_UP_THRESHOLD)) { 758 stats[lcore_id].trend = 0; 759 return FREQ_HIGHER; 760 } 761 762 return FREQ_CURRENT; 763 } 764 765 /* main processing loop */ 766 static int 767 main_loop(__attribute__((unused)) void *dummy) 768 { 769 struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; 770 unsigned lcore_id; 771 uint64_t prev_tsc, diff_tsc, cur_tsc; 772 uint64_t prev_tsc_power = 0, cur_tsc_power, diff_tsc_power; 773 int i, j, nb_rx; 774 uint8_t portid, queueid; 775 struct lcore_conf *qconf; 776 struct lcore_rx_queue *rx_queue; 777 enum freq_scale_hint_t lcore_scaleup_hint; 778 779 uint32_t lcore_rx_idle_count = 0; 780 uint32_t lcore_idle_hint = 0; 781 782 const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * BURST_TX_DRAIN_US; 783 784 prev_tsc = 0; 785 786 lcore_id = rte_lcore_id(); 787 qconf = &lcore_conf[lcore_id]; 788 789 if (qconf->n_rx_queue == 0) { 790 RTE_LOG(INFO, L3FWD_POWER, "lcore %u has nothing to do\n", lcore_id); 791 return 0; 792 } 793 794 RTE_LOG(INFO, L3FWD_POWER, "entering main loop on lcore %u\n", lcore_id); 795 796 for (i = 0; i < qconf->n_rx_queue; i++) { 797 798 portid = qconf->rx_queue_list[i].port_id; 799 queueid = qconf->rx_queue_list[i].queue_id; 800 RTE_LOG(INFO, L3FWD_POWER, " -- lcoreid=%u portid=%hhu " 801 "rxqueueid=%hhu\n", lcore_id, portid, queueid); 802 } 803 804 while (1) { 805 stats[lcore_id].nb_iteration_looped++; 806 807 cur_tsc = rte_rdtsc(); 808 cur_tsc_power = cur_tsc; 809 810 /* 811 * TX burst queue drain 812 */ 813 diff_tsc = cur_tsc - prev_tsc; 814 if (unlikely(diff_tsc > drain_tsc)) { 815 816 /* 817 * This could be optimized (use queueid instead of 818 * portid), but it is not called so often 819 */ 820 for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++) { 821 if (qconf->tx_mbufs[portid].len == 0) 822 continue; 823 send_burst(&lcore_conf[lcore_id], 824 qconf->tx_mbufs[portid].len, 825 portid); 826 qconf->tx_mbufs[portid].len = 0; 827 } 828 829 prev_tsc = cur_tsc; 830 } 831 832 diff_tsc_power = cur_tsc_power - prev_tsc_power; 833 if (diff_tsc_power > TIMER_RESOLUTION_CYCLES) { 834 rte_timer_manage(); 835 prev_tsc_power = cur_tsc_power; 836 } 837 838 /* 839 * Read packet from RX queues 840 */ 841 lcore_scaleup_hint = FREQ_CURRENT; 842 lcore_rx_idle_count = 0; 843 for (i = 0; i < qconf->n_rx_queue; ++i) { 844 rx_queue = &(qconf->rx_queue_list[i]); 845 rx_queue->idle_hint = 0; 846 portid = rx_queue->port_id; 847 queueid = rx_queue->queue_id; 848 849 nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst, 850 MAX_PKT_BURST); 851 stats[lcore_id].nb_rx_processed += nb_rx; 852 if (unlikely(nb_rx == 0)) { 853 /** 854 * no packet received from rx queue, try to 855 * sleep for a while forcing CPU enter deeper 856 * C states. 857 */ 858 rx_queue->zero_rx_packet_count++; 859 860 if (rx_queue->zero_rx_packet_count <= 861 MIN_ZERO_POLL_COUNT) 862 continue; 863 864 rx_queue->idle_hint = power_idle_heuristic(\ 865 rx_queue->zero_rx_packet_count); 866 lcore_rx_idle_count++; 867 } else { 868 rx_queue->zero_rx_packet_count = 0; 869 870 /** 871 * do not scale up frequency immediately as 872 * user to kernel space communication is costly 873 * which might impact packet I/O for received 874 * packets. 875 */ 876 rx_queue->freq_up_hint = 877 power_freq_scaleup_heuristic(lcore_id, 878 portid, queueid); 879 } 880 881 /* Prefetch first packets */ 882 for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++) { 883 rte_prefetch0(rte_pktmbuf_mtod( 884 pkts_burst[j], void *)); 885 } 886 887 /* Prefetch and forward already prefetched packets */ 888 for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) { 889 rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[ 890 j + PREFETCH_OFFSET], void *)); 891 l3fwd_simple_forward(pkts_burst[j], portid, 892 qconf); 893 } 894 895 /* Forward remaining prefetched packets */ 896 for (; j < nb_rx; j++) { 897 l3fwd_simple_forward(pkts_burst[j], portid, 898 qconf); 899 } 900 } 901 902 if (likely(lcore_rx_idle_count != qconf->n_rx_queue)) { 903 for (i = 1, lcore_scaleup_hint = 904 qconf->rx_queue_list[0].freq_up_hint; 905 i < qconf->n_rx_queue; ++i) { 906 rx_queue = &(qconf->rx_queue_list[i]); 907 if (rx_queue->freq_up_hint > 908 lcore_scaleup_hint) 909 lcore_scaleup_hint = 910 rx_queue->freq_up_hint; 911 } 912 913 if (lcore_scaleup_hint == FREQ_HIGHEST) 914 rte_power_freq_max(lcore_id); 915 else if (lcore_scaleup_hint == FREQ_HIGHER) 916 rte_power_freq_up(lcore_id); 917 } else { 918 /** 919 * All Rx queues empty in recent consecutive polls, 920 * sleep in a conservative manner, meaning sleep as 921 * less as possible. 922 */ 923 for (i = 1, lcore_idle_hint = 924 qconf->rx_queue_list[0].idle_hint; 925 i < qconf->n_rx_queue; ++i) { 926 rx_queue = &(qconf->rx_queue_list[i]); 927 if (rx_queue->idle_hint < lcore_idle_hint) 928 lcore_idle_hint = rx_queue->idle_hint; 929 } 930 931 if ( lcore_idle_hint < SLEEP_GEAR1_THRESHOLD) 932 /** 933 * execute "pause" instruction to avoid context 934 * switch for short sleep. 935 */ 936 rte_delay_us(lcore_idle_hint); 937 else 938 /* long sleep force runing thread to suspend */ 939 usleep(lcore_idle_hint); 940 941 stats[lcore_id].sleep_time += lcore_idle_hint; 942 } 943 } 944 } 945 946 static int 947 check_lcore_params(void) 948 { 949 uint8_t queue, lcore; 950 uint16_t i; 951 int socketid; 952 953 for (i = 0; i < nb_lcore_params; ++i) { 954 queue = lcore_params[i].queue_id; 955 if (queue >= MAX_RX_QUEUE_PER_PORT) { 956 printf("invalid queue number: %hhu\n", queue); 957 return -1; 958 } 959 lcore = lcore_params[i].lcore_id; 960 if (!rte_lcore_is_enabled(lcore)) { 961 printf("error: lcore %hhu is not enabled in lcore " 962 "mask\n", lcore); 963 return -1; 964 } 965 if ((socketid = rte_lcore_to_socket_id(lcore) != 0) && 966 (numa_on == 0)) { 967 printf("warning: lcore %hhu is on socket %d with numa " 968 "off\n", lcore, socketid); 969 } 970 } 971 return 0; 972 } 973 974 static int 975 check_port_config(const unsigned nb_ports) 976 { 977 unsigned portid; 978 uint16_t i; 979 980 for (i = 0; i < nb_lcore_params; ++i) { 981 portid = lcore_params[i].port_id; 982 if ((enabled_port_mask & (1 << portid)) == 0) { 983 printf("port %u is not enabled in port mask\n", 984 portid); 985 return -1; 986 } 987 if (portid >= nb_ports) { 988 printf("port %u is not present on the board\n", 989 portid); 990 return -1; 991 } 992 } 993 return 0; 994 } 995 996 static uint8_t 997 get_port_n_rx_queues(const uint8_t port) 998 { 999 int queue = -1; 1000 uint16_t i; 1001 1002 for (i = 0; i < nb_lcore_params; ++i) { 1003 if (lcore_params[i].port_id == port && 1004 lcore_params[i].queue_id > queue) 1005 queue = lcore_params[i].queue_id; 1006 } 1007 return (uint8_t)(++queue); 1008 } 1009 1010 static int 1011 init_lcore_rx_queues(void) 1012 { 1013 uint16_t i, nb_rx_queue; 1014 uint8_t lcore; 1015 1016 for (i = 0; i < nb_lcore_params; ++i) { 1017 lcore = lcore_params[i].lcore_id; 1018 nb_rx_queue = lcore_conf[lcore].n_rx_queue; 1019 if (nb_rx_queue >= MAX_RX_QUEUE_PER_LCORE) { 1020 printf("error: too many queues (%u) for lcore: %u\n", 1021 (unsigned)nb_rx_queue + 1, (unsigned)lcore); 1022 return -1; 1023 } else { 1024 lcore_conf[lcore].rx_queue_list[nb_rx_queue].port_id = 1025 lcore_params[i].port_id; 1026 lcore_conf[lcore].rx_queue_list[nb_rx_queue].queue_id = 1027 lcore_params[i].queue_id; 1028 lcore_conf[lcore].n_rx_queue++; 1029 } 1030 } 1031 return 0; 1032 } 1033 1034 /* display usage */ 1035 static void 1036 print_usage(const char *prgname) 1037 { 1038 printf ("%s [EAL options] -- -p PORTMASK -P" 1039 " [--config (port,queue,lcore)[,(port,queue,lcore]]" 1040 " [--enable-jumbo [--max-pkt-len PKTLEN]]\n" 1041 " -p PORTMASK: hexadecimal bitmask of ports to configure\n" 1042 " -P : enable promiscuous mode\n" 1043 " --config (port,queue,lcore): rx queues configuration\n" 1044 " --no-numa: optional, disable numa awareness\n" 1045 " --enable-jumbo: enable jumbo frame" 1046 " which max packet len is PKTLEN in decimal (64-9600)\n", 1047 prgname); 1048 } 1049 1050 static int parse_max_pkt_len(const char *pktlen) 1051 { 1052 char *end = NULL; 1053 unsigned long len; 1054 1055 /* parse decimal string */ 1056 len = strtoul(pktlen, &end, 10); 1057 if ((pktlen[0] == '\0') || (end == NULL) || (*end != '\0')) 1058 return -1; 1059 1060 if (len == 0) 1061 return -1; 1062 1063 return len; 1064 } 1065 1066 static int 1067 parse_portmask(const char *portmask) 1068 { 1069 char *end = NULL; 1070 unsigned long pm; 1071 1072 /* parse hexadecimal string */ 1073 pm = strtoul(portmask, &end, 16); 1074 if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0')) 1075 return -1; 1076 1077 if (pm == 0) 1078 return -1; 1079 1080 return pm; 1081 } 1082 1083 static int 1084 parse_config(const char *q_arg) 1085 { 1086 char s[256]; 1087 const char *p, *p0 = q_arg; 1088 char *end; 1089 enum fieldnames { 1090 FLD_PORT = 0, 1091 FLD_QUEUE, 1092 FLD_LCORE, 1093 _NUM_FLD 1094 }; 1095 unsigned long int_fld[_NUM_FLD]; 1096 char *str_fld[_NUM_FLD]; 1097 int i; 1098 unsigned size; 1099 1100 nb_lcore_params = 0; 1101 1102 while ((p = strchr(p0,'(')) != NULL) { 1103 ++p; 1104 if((p0 = strchr(p,')')) == NULL) 1105 return -1; 1106 1107 size = p0 - p; 1108 if(size >= sizeof(s)) 1109 return -1; 1110 1111 snprintf(s, sizeof(s), "%.*s", size, p); 1112 if (rte_strsplit(s, sizeof(s), str_fld, _NUM_FLD, ',') != 1113 _NUM_FLD) 1114 return -1; 1115 for (i = 0; i < _NUM_FLD; i++){ 1116 errno = 0; 1117 int_fld[i] = strtoul(str_fld[i], &end, 0); 1118 if (errno != 0 || end == str_fld[i] || int_fld[i] > 1119 255) 1120 return -1; 1121 } 1122 if (nb_lcore_params >= MAX_LCORE_PARAMS) { 1123 printf("exceeded max number of lcore params: %hu\n", 1124 nb_lcore_params); 1125 return -1; 1126 } 1127 lcore_params_array[nb_lcore_params].port_id = 1128 (uint8_t)int_fld[FLD_PORT]; 1129 lcore_params_array[nb_lcore_params].queue_id = 1130 (uint8_t)int_fld[FLD_QUEUE]; 1131 lcore_params_array[nb_lcore_params].lcore_id = 1132 (uint8_t)int_fld[FLD_LCORE]; 1133 ++nb_lcore_params; 1134 } 1135 lcore_params = lcore_params_array; 1136 1137 return 0; 1138 } 1139 1140 /* Parse the argument given in the command line of the application */ 1141 static int 1142 parse_args(int argc, char **argv) 1143 { 1144 int opt, ret; 1145 char **argvopt; 1146 int option_index; 1147 char *prgname = argv[0]; 1148 static struct option lgopts[] = { 1149 {"config", 1, 0, 0}, 1150 {"no-numa", 0, 0, 0}, 1151 {"enable-jumbo", 0, 0, 0}, 1152 {NULL, 0, 0, 0} 1153 }; 1154 1155 argvopt = argv; 1156 1157 while ((opt = getopt_long(argc, argvopt, "p:P", 1158 lgopts, &option_index)) != EOF) { 1159 1160 switch (opt) { 1161 /* portmask */ 1162 case 'p': 1163 enabled_port_mask = parse_portmask(optarg); 1164 if (enabled_port_mask == 0) { 1165 printf("invalid portmask\n"); 1166 print_usage(prgname); 1167 return -1; 1168 } 1169 break; 1170 case 'P': 1171 printf("Promiscuous mode selected\n"); 1172 promiscuous_on = 1; 1173 break; 1174 1175 /* long options */ 1176 case 0: 1177 if (!strncmp(lgopts[option_index].name, "config", 6)) { 1178 ret = parse_config(optarg); 1179 if (ret) { 1180 printf("invalid config\n"); 1181 print_usage(prgname); 1182 return -1; 1183 } 1184 } 1185 1186 if (!strncmp(lgopts[option_index].name, 1187 "no-numa", 7)) { 1188 printf("numa is disabled \n"); 1189 numa_on = 0; 1190 } 1191 1192 if (!strncmp(lgopts[option_index].name, 1193 "enable-jumbo", 12)) { 1194 struct option lenopts = 1195 {"max-pkt-len", required_argument, \ 1196 0, 0}; 1197 1198 printf("jumbo frame is enabled \n"); 1199 port_conf.rxmode.jumbo_frame = 1; 1200 1201 /** 1202 * if no max-pkt-len set, use the default value 1203 * ETHER_MAX_LEN 1204 */ 1205 if (0 == getopt_long(argc, argvopt, "", 1206 &lenopts, &option_index)) { 1207 ret = parse_max_pkt_len(optarg); 1208 if ((ret < 64) || 1209 (ret > MAX_JUMBO_PKT_LEN)){ 1210 printf("invalid packet " 1211 "length\n"); 1212 print_usage(prgname); 1213 return -1; 1214 } 1215 port_conf.rxmode.max_rx_pkt_len = ret; 1216 } 1217 printf("set jumbo frame " 1218 "max packet length to %u\n", 1219 (unsigned int)port_conf.rxmode.max_rx_pkt_len); 1220 } 1221 1222 break; 1223 1224 default: 1225 print_usage(prgname); 1226 return -1; 1227 } 1228 } 1229 1230 if (optind >= 0) 1231 argv[optind-1] = prgname; 1232 1233 ret = optind-1; 1234 optind = 0; /* reset getopt lib */ 1235 return ret; 1236 } 1237 1238 static void 1239 print_ethaddr(const char *name, const struct ether_addr *eth_addr) 1240 { 1241 char buf[ETHER_ADDR_FMT_SIZE]; 1242 ether_format_addr(buf, ETHER_ADDR_FMT_SIZE, eth_addr); 1243 printf("%s%s", name, buf); 1244 } 1245 1246 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 1247 static void 1248 setup_hash(int socketid) 1249 { 1250 struct rte_hash_parameters ipv4_l3fwd_hash_params = { 1251 .name = NULL, 1252 .entries = L3FWD_HASH_ENTRIES, 1253 .bucket_entries = 4, 1254 .key_len = sizeof(struct ipv4_5tuple), 1255 .hash_func = DEFAULT_HASH_FUNC, 1256 .hash_func_init_val = 0, 1257 }; 1258 1259 struct rte_hash_parameters ipv6_l3fwd_hash_params = { 1260 .name = NULL, 1261 .entries = L3FWD_HASH_ENTRIES, 1262 .bucket_entries = 4, 1263 .key_len = sizeof(struct ipv6_5tuple), 1264 .hash_func = DEFAULT_HASH_FUNC, 1265 .hash_func_init_val = 0, 1266 }; 1267 1268 unsigned i; 1269 int ret; 1270 char s[64]; 1271 1272 /* create ipv4 hash */ 1273 snprintf(s, sizeof(s), "ipv4_l3fwd_hash_%d", socketid); 1274 ipv4_l3fwd_hash_params.name = s; 1275 ipv4_l3fwd_hash_params.socket_id = socketid; 1276 ipv4_l3fwd_lookup_struct[socketid] = 1277 rte_hash_create(&ipv4_l3fwd_hash_params); 1278 if (ipv4_l3fwd_lookup_struct[socketid] == NULL) 1279 rte_exit(EXIT_FAILURE, "Unable to create the l3fwd hash on " 1280 "socket %d\n", socketid); 1281 1282 /* create ipv6 hash */ 1283 snprintf(s, sizeof(s), "ipv6_l3fwd_hash_%d", socketid); 1284 ipv6_l3fwd_hash_params.name = s; 1285 ipv6_l3fwd_hash_params.socket_id = socketid; 1286 ipv6_l3fwd_lookup_struct[socketid] = 1287 rte_hash_create(&ipv6_l3fwd_hash_params); 1288 if (ipv6_l3fwd_lookup_struct[socketid] == NULL) 1289 rte_exit(EXIT_FAILURE, "Unable to create the l3fwd hash on " 1290 "socket %d\n", socketid); 1291 1292 1293 /* populate the ipv4 hash */ 1294 for (i = 0; i < IPV4_L3FWD_NUM_ROUTES; i++) { 1295 ret = rte_hash_add_key (ipv4_l3fwd_lookup_struct[socketid], 1296 (void *) &ipv4_l3fwd_route_array[i].key); 1297 if (ret < 0) { 1298 rte_exit(EXIT_FAILURE, "Unable to add entry %u to the" 1299 "l3fwd hash on socket %d\n", i, socketid); 1300 } 1301 ipv4_l3fwd_out_if[ret] = ipv4_l3fwd_route_array[i].if_out; 1302 printf("Hash: Adding key\n"); 1303 print_ipv4_key(ipv4_l3fwd_route_array[i].key); 1304 } 1305 1306 /* populate the ipv6 hash */ 1307 for (i = 0; i < IPV6_L3FWD_NUM_ROUTES; i++) { 1308 ret = rte_hash_add_key (ipv6_l3fwd_lookup_struct[socketid], 1309 (void *) &ipv6_l3fwd_route_array[i].key); 1310 if (ret < 0) { 1311 rte_exit(EXIT_FAILURE, "Unable to add entry %u to the" 1312 "l3fwd hash on socket %d\n", i, socketid); 1313 } 1314 ipv6_l3fwd_out_if[ret] = ipv6_l3fwd_route_array[i].if_out; 1315 printf("Hash: Adding key\n"); 1316 print_ipv6_key(ipv6_l3fwd_route_array[i].key); 1317 } 1318 } 1319 #endif 1320 1321 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 1322 static void 1323 setup_lpm(int socketid) 1324 { 1325 unsigned i; 1326 int ret; 1327 char s[64]; 1328 1329 /* create the LPM table */ 1330 snprintf(s, sizeof(s), "IPV4_L3FWD_LPM_%d", socketid); 1331 ipv4_l3fwd_lookup_struct[socketid] = rte_lpm_create(s, socketid, 1332 IPV4_L3FWD_LPM_MAX_RULES, 0); 1333 if (ipv4_l3fwd_lookup_struct[socketid] == NULL) 1334 rte_exit(EXIT_FAILURE, "Unable to create the l3fwd LPM table" 1335 " on socket %d\n", socketid); 1336 1337 /* populate the LPM table */ 1338 for (i = 0; i < IPV4_L3FWD_NUM_ROUTES; i++) { 1339 ret = rte_lpm_add(ipv4_l3fwd_lookup_struct[socketid], 1340 ipv4_l3fwd_route_array[i].ip, 1341 ipv4_l3fwd_route_array[i].depth, 1342 ipv4_l3fwd_route_array[i].if_out); 1343 1344 if (ret < 0) { 1345 rte_exit(EXIT_FAILURE, "Unable to add entry %u to the " 1346 "l3fwd LPM table on socket %d\n", 1347 i, socketid); 1348 } 1349 1350 printf("LPM: Adding route 0x%08x / %d (%d)\n", 1351 (unsigned)ipv4_l3fwd_route_array[i].ip, 1352 ipv4_l3fwd_route_array[i].depth, 1353 ipv4_l3fwd_route_array[i].if_out); 1354 } 1355 } 1356 #endif 1357 1358 static int 1359 init_mem(unsigned nb_mbuf) 1360 { 1361 struct lcore_conf *qconf; 1362 int socketid; 1363 unsigned lcore_id; 1364 char s[64]; 1365 1366 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { 1367 if (rte_lcore_is_enabled(lcore_id) == 0) 1368 continue; 1369 1370 if (numa_on) 1371 socketid = rte_lcore_to_socket_id(lcore_id); 1372 else 1373 socketid = 0; 1374 1375 if (socketid >= NB_SOCKETS) { 1376 rte_exit(EXIT_FAILURE, "Socket %d of lcore %u is " 1377 "out of range %d\n", socketid, 1378 lcore_id, NB_SOCKETS); 1379 } 1380 if (pktmbuf_pool[socketid] == NULL) { 1381 snprintf(s, sizeof(s), "mbuf_pool_%d", socketid); 1382 pktmbuf_pool[socketid] = 1383 rte_mempool_create(s, nb_mbuf, 1384 MBUF_SIZE, MEMPOOL_CACHE_SIZE, 1385 sizeof(struct rte_pktmbuf_pool_private), 1386 rte_pktmbuf_pool_init, NULL, 1387 rte_pktmbuf_init, NULL, 1388 socketid, 0); 1389 if (pktmbuf_pool[socketid] == NULL) 1390 rte_exit(EXIT_FAILURE, 1391 "Cannot init mbuf pool on socket %d\n", 1392 socketid); 1393 else 1394 printf("Allocated mbuf pool on socket %d\n", 1395 socketid); 1396 1397 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 1398 setup_lpm(socketid); 1399 #else 1400 setup_hash(socketid); 1401 #endif 1402 } 1403 qconf = &lcore_conf[lcore_id]; 1404 qconf->ipv4_lookup_struct = ipv4_l3fwd_lookup_struct[socketid]; 1405 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 1406 qconf->ipv6_lookup_struct = ipv6_l3fwd_lookup_struct[socketid]; 1407 #endif 1408 } 1409 return 0; 1410 } 1411 1412 /* Check the link status of all ports in up to 9s, and print them finally */ 1413 static void 1414 check_all_ports_link_status(uint8_t port_num, uint32_t port_mask) 1415 { 1416 #define CHECK_INTERVAL 100 /* 100ms */ 1417 #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */ 1418 uint8_t portid, count, all_ports_up, print_flag = 0; 1419 struct rte_eth_link link; 1420 1421 printf("\nChecking link status"); 1422 fflush(stdout); 1423 for (count = 0; count <= MAX_CHECK_TIME; count++) { 1424 all_ports_up = 1; 1425 for (portid = 0; portid < port_num; portid++) { 1426 if ((port_mask & (1 << portid)) == 0) 1427 continue; 1428 memset(&link, 0, sizeof(link)); 1429 rte_eth_link_get_nowait(portid, &link); 1430 /* print link status if flag set */ 1431 if (print_flag == 1) { 1432 if (link.link_status) 1433 printf("Port %d Link Up - speed %u " 1434 "Mbps - %s\n", (uint8_t)portid, 1435 (unsigned)link.link_speed, 1436 (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? 1437 ("full-duplex") : ("half-duplex\n")); 1438 else 1439 printf("Port %d Link Down\n", 1440 (uint8_t)portid); 1441 continue; 1442 } 1443 /* clear all_ports_up flag if any link down */ 1444 if (link.link_status == 0) { 1445 all_ports_up = 0; 1446 break; 1447 } 1448 } 1449 /* after finally printing all link status, get out */ 1450 if (print_flag == 1) 1451 break; 1452 1453 if (all_ports_up == 0) { 1454 printf("."); 1455 fflush(stdout); 1456 rte_delay_ms(CHECK_INTERVAL); 1457 } 1458 1459 /* set the print_flag if all ports up or timeout */ 1460 if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) { 1461 print_flag = 1; 1462 printf("done\n"); 1463 } 1464 } 1465 } 1466 1467 int 1468 main(int argc, char **argv) 1469 { 1470 struct lcore_conf *qconf; 1471 struct rte_eth_dev_info dev_info; 1472 struct rte_eth_txconf *txconf; 1473 int ret; 1474 unsigned nb_ports; 1475 uint16_t queueid; 1476 unsigned lcore_id; 1477 uint64_t hz; 1478 uint32_t n_tx_queue, nb_lcores; 1479 uint8_t portid, nb_rx_queue, queue, socketid; 1480 1481 /* catch SIGINT and restore cpufreq governor to ondemand */ 1482 signal(SIGINT, signal_exit_now); 1483 1484 /* init EAL */ 1485 ret = rte_eal_init(argc, argv); 1486 if (ret < 0) 1487 rte_exit(EXIT_FAILURE, "Invalid EAL parameters\n"); 1488 argc -= ret; 1489 argv += ret; 1490 1491 /* init RTE timer library to be used late */ 1492 rte_timer_subsystem_init(); 1493 1494 /* parse application arguments (after the EAL ones) */ 1495 ret = parse_args(argc, argv); 1496 if (ret < 0) 1497 rte_exit(EXIT_FAILURE, "Invalid L3FWD parameters\n"); 1498 1499 if (check_lcore_params() < 0) 1500 rte_exit(EXIT_FAILURE, "check_lcore_params failed\n"); 1501 1502 ret = init_lcore_rx_queues(); 1503 if (ret < 0) 1504 rte_exit(EXIT_FAILURE, "init_lcore_rx_queues failed\n"); 1505 1506 1507 nb_ports = rte_eth_dev_count(); 1508 if (nb_ports > RTE_MAX_ETHPORTS) 1509 nb_ports = RTE_MAX_ETHPORTS; 1510 1511 if (check_port_config(nb_ports) < 0) 1512 rte_exit(EXIT_FAILURE, "check_port_config failed\n"); 1513 1514 nb_lcores = rte_lcore_count(); 1515 1516 /* initialize all ports */ 1517 for (portid = 0; portid < nb_ports; portid++) { 1518 /* skip ports that are not enabled */ 1519 if ((enabled_port_mask & (1 << portid)) == 0) { 1520 printf("\nSkipping disabled port %d\n", portid); 1521 continue; 1522 } 1523 1524 /* init port */ 1525 printf("Initializing port %d ... ", portid ); 1526 fflush(stdout); 1527 1528 nb_rx_queue = get_port_n_rx_queues(portid); 1529 n_tx_queue = nb_lcores; 1530 if (n_tx_queue > MAX_TX_QUEUE_PER_PORT) 1531 n_tx_queue = MAX_TX_QUEUE_PER_PORT; 1532 printf("Creating queues: nb_rxq=%d nb_txq=%u... ", 1533 nb_rx_queue, (unsigned)n_tx_queue ); 1534 ret = rte_eth_dev_configure(portid, nb_rx_queue, 1535 (uint16_t)n_tx_queue, &port_conf); 1536 if (ret < 0) 1537 rte_exit(EXIT_FAILURE, "Cannot configure device: " 1538 "err=%d, port=%d\n", ret, portid); 1539 1540 rte_eth_macaddr_get(portid, &ports_eth_addr[portid]); 1541 print_ethaddr(" Address:", &ports_eth_addr[portid]); 1542 printf(", "); 1543 1544 /* init memory */ 1545 ret = init_mem(NB_MBUF); 1546 if (ret < 0) 1547 rte_exit(EXIT_FAILURE, "init_mem failed\n"); 1548 1549 /* init one TX queue per couple (lcore,port) */ 1550 queueid = 0; 1551 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { 1552 if (rte_lcore_is_enabled(lcore_id) == 0) 1553 continue; 1554 1555 if (numa_on) 1556 socketid = \ 1557 (uint8_t)rte_lcore_to_socket_id(lcore_id); 1558 else 1559 socketid = 0; 1560 1561 printf("txq=%u,%d,%d ", lcore_id, queueid, socketid); 1562 fflush(stdout); 1563 1564 rte_eth_dev_info_get(portid, &dev_info); 1565 txconf = &dev_info.default_txconf; 1566 if (port_conf.rxmode.jumbo_frame) 1567 txconf->txq_flags = 0; 1568 ret = rte_eth_tx_queue_setup(portid, queueid, nb_txd, 1569 socketid, txconf); 1570 if (ret < 0) 1571 rte_exit(EXIT_FAILURE, 1572 "rte_eth_tx_queue_setup: err=%d, " 1573 "port=%d\n", ret, portid); 1574 1575 qconf = &lcore_conf[lcore_id]; 1576 qconf->tx_queue_id[portid] = queueid; 1577 queueid++; 1578 } 1579 printf("\n"); 1580 } 1581 1582 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { 1583 if (rte_lcore_is_enabled(lcore_id) == 0) 1584 continue; 1585 1586 /* init power management library */ 1587 ret = rte_power_init(lcore_id); 1588 if (ret) 1589 rte_exit(EXIT_FAILURE, "Power management library " 1590 "initialization failed on core%u\n", lcore_id); 1591 1592 /* init timer structures for each enabled lcore */ 1593 rte_timer_init(&power_timers[lcore_id]); 1594 hz = rte_get_timer_hz(); 1595 rte_timer_reset(&power_timers[lcore_id], 1596 hz/TIMER_NUMBER_PER_SECOND, SINGLE, lcore_id, 1597 power_timer_cb, NULL); 1598 1599 qconf = &lcore_conf[lcore_id]; 1600 printf("\nInitializing rx queues on lcore %u ... ", lcore_id ); 1601 fflush(stdout); 1602 /* init RX queues */ 1603 for(queue = 0; queue < qconf->n_rx_queue; ++queue) { 1604 portid = qconf->rx_queue_list[queue].port_id; 1605 queueid = qconf->rx_queue_list[queue].queue_id; 1606 1607 if (numa_on) 1608 socketid = \ 1609 (uint8_t)rte_lcore_to_socket_id(lcore_id); 1610 else 1611 socketid = 0; 1612 1613 printf("rxq=%d,%d,%d ", portid, queueid, socketid); 1614 fflush(stdout); 1615 1616 ret = rte_eth_rx_queue_setup(portid, queueid, nb_rxd, 1617 socketid, NULL, 1618 pktmbuf_pool[socketid]); 1619 if (ret < 0) 1620 rte_exit(EXIT_FAILURE, 1621 "rte_eth_rx_queue_setup: err=%d, " 1622 "port=%d\n", ret, portid); 1623 } 1624 } 1625 1626 printf("\n"); 1627 1628 /* start ports */ 1629 for (portid = 0; portid < nb_ports; portid++) { 1630 if ((enabled_port_mask & (1 << portid)) == 0) { 1631 continue; 1632 } 1633 /* Start device */ 1634 ret = rte_eth_dev_start(portid); 1635 if (ret < 0) 1636 rte_exit(EXIT_FAILURE, "rte_eth_dev_start: err=%d, " 1637 "port=%d\n", ret, portid); 1638 1639 /* 1640 * If enabled, put device in promiscuous mode. 1641 * This allows IO forwarding mode to forward packets 1642 * to itself through 2 cross-connected ports of the 1643 * target machine. 1644 */ 1645 if (promiscuous_on) 1646 rte_eth_promiscuous_enable(portid); 1647 } 1648 1649 check_all_ports_link_status((uint8_t)nb_ports, enabled_port_mask); 1650 1651 /* launch per-lcore init on every lcore */ 1652 rte_eal_mp_remote_launch(main_loop, NULL, CALL_MASTER); 1653 RTE_LCORE_FOREACH_SLAVE(lcore_id) { 1654 if (rte_eal_wait_lcore(lcore_id) < 0) 1655 return -1; 1656 } 1657 1658 return 0; 1659 } 1660