1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2018 Intel Corporation 3 */ 4 5 #include <stdio.h> 6 #include <stdlib.h> 7 #include <stdint.h> 8 #include <inttypes.h> 9 #include <sys/types.h> 10 #include <string.h> 11 #include <sys/queue.h> 12 #include <stdarg.h> 13 #include <errno.h> 14 #include <getopt.h> 15 #include <unistd.h> 16 #include <signal.h> 17 18 #include <rte_common.h> 19 #include <rte_byteorder.h> 20 #include <rte_log.h> 21 #include <rte_malloc.h> 22 #include <rte_memory.h> 23 #include <rte_memcpy.h> 24 #include <rte_eal.h> 25 #include <rte_launch.h> 26 #include <rte_atomic.h> 27 #include <rte_cycles.h> 28 #include <rte_prefetch.h> 29 #include <rte_lcore.h> 30 #include <rte_per_lcore.h> 31 #include <rte_branch_prediction.h> 32 #include <rte_interrupts.h> 33 #include <rte_random.h> 34 #include <rte_debug.h> 35 #include <rte_ether.h> 36 #include <rte_ethdev.h> 37 #include <rte_mempool.h> 38 #include <rte_mbuf.h> 39 #include <rte_ip.h> 40 #include <rte_tcp.h> 41 #include <rte_udp.h> 42 #include <rte_string_fns.h> 43 #include <rte_timer.h> 44 #include <rte_power.h> 45 #include <rte_spinlock.h> 46 #include <rte_power_empty_poll.h> 47 48 #include "perf_core.h" 49 #include "main.h" 50 51 #define RTE_LOGTYPE_L3FWD_POWER RTE_LOGTYPE_USER1 52 53 #define MAX_PKT_BURST 32 54 55 #define MIN_ZERO_POLL_COUNT 10 56 57 /* 100 ms interval */ 58 #define TIMER_NUMBER_PER_SECOND 10 59 /* (10ms) */ 60 #define INTERVALS_PER_SECOND 100 61 /* 100000 us */ 62 #define SCALING_PERIOD (1000000/TIMER_NUMBER_PER_SECOND) 63 #define SCALING_DOWN_TIME_RATIO_THRESHOLD 0.25 64 65 #define APP_LOOKUP_EXACT_MATCH 0 66 #define APP_LOOKUP_LPM 1 67 #define DO_RFC_1812_CHECKS 68 69 #ifndef APP_LOOKUP_METHOD 70 #define APP_LOOKUP_METHOD APP_LOOKUP_LPM 71 #endif 72 73 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 74 #include <rte_hash.h> 75 #elif (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 76 #include <rte_lpm.h> 77 #else 78 #error "APP_LOOKUP_METHOD set to incorrect value" 79 #endif 80 81 #ifndef IPv6_BYTES 82 #define IPv6_BYTES_FMT "%02x%02x:%02x%02x:%02x%02x:%02x%02x:"\ 83 "%02x%02x:%02x%02x:%02x%02x:%02x%02x" 84 #define IPv6_BYTES(addr) \ 85 addr[0], addr[1], addr[2], addr[3], \ 86 addr[4], addr[5], addr[6], addr[7], \ 87 addr[8], addr[9], addr[10], addr[11],\ 88 addr[12], addr[13],addr[14], addr[15] 89 #endif 90 91 #define MAX_JUMBO_PKT_LEN 9600 92 93 #define IPV6_ADDR_LEN 16 94 95 #define MEMPOOL_CACHE_SIZE 256 96 97 /* 98 * This expression is used to calculate the number of mbufs needed depending on 99 * user input, taking into account memory for rx and tx hardware rings, cache 100 * per lcore and mtable per port per lcore. RTE_MAX is used to ensure that 101 * NB_MBUF never goes below a minimum value of 8192. 102 */ 103 104 #define NB_MBUF RTE_MAX ( \ 105 (nb_ports*nb_rx_queue*nb_rxd + \ 106 nb_ports*nb_lcores*MAX_PKT_BURST + \ 107 nb_ports*n_tx_queue*nb_txd + \ 108 nb_lcores*MEMPOOL_CACHE_SIZE), \ 109 (unsigned)8192) 110 111 #define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */ 112 113 #define NB_SOCKETS 8 114 115 /* Configure how many packets ahead to prefetch, when reading packets */ 116 #define PREFETCH_OFFSET 3 117 118 /* 119 * Configurable number of RX/TX ring descriptors 120 */ 121 #define RTE_TEST_RX_DESC_DEFAULT 1024 122 #define RTE_TEST_TX_DESC_DEFAULT 1024 123 124 /* 125 * These two thresholds were decided on by running the training algorithm on 126 * a 2.5GHz Xeon. These defaults can be overridden by supplying non-zero values 127 * for the med_threshold and high_threshold parameters on the command line. 128 */ 129 #define EMPTY_POLL_MED_THRESHOLD 350000UL 130 #define EMPTY_POLL_HGH_THRESHOLD 580000UL 131 132 133 134 static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT; 135 static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT; 136 137 /* ethernet addresses of ports */ 138 static struct rte_ether_addr ports_eth_addr[RTE_MAX_ETHPORTS]; 139 140 /* ethernet addresses of ports */ 141 static rte_spinlock_t locks[RTE_MAX_ETHPORTS]; 142 143 /* mask of enabled ports */ 144 static uint32_t enabled_port_mask = 0; 145 /* Ports set in promiscuous mode off by default. */ 146 static int promiscuous_on = 0; 147 /* NUMA is enabled by default. */ 148 static int numa_on = 1; 149 /* emptypoll is disabled by default. */ 150 static bool empty_poll_on; 151 static bool empty_poll_train; 152 volatile bool empty_poll_stop; 153 static struct ep_params *ep_params; 154 static struct ep_policy policy; 155 static long ep_med_edpi, ep_hgh_edpi; 156 157 static int parse_ptype; /**< Parse packet type using rx callback, and */ 158 /**< disabled by default */ 159 160 enum freq_scale_hint_t 161 { 162 FREQ_LOWER = -1, 163 FREQ_CURRENT = 0, 164 FREQ_HIGHER = 1, 165 FREQ_HIGHEST = 2 166 }; 167 168 struct lcore_rx_queue { 169 uint16_t port_id; 170 uint8_t queue_id; 171 enum freq_scale_hint_t freq_up_hint; 172 uint32_t zero_rx_packet_count; 173 uint32_t idle_hint; 174 } __rte_cache_aligned; 175 176 #define MAX_RX_QUEUE_PER_LCORE 16 177 #define MAX_TX_QUEUE_PER_PORT RTE_MAX_ETHPORTS 178 #define MAX_RX_QUEUE_PER_PORT 128 179 180 #define MAX_RX_QUEUE_INTERRUPT_PER_PORT 16 181 182 183 struct lcore_params lcore_params_array[MAX_LCORE_PARAMS]; 184 static struct lcore_params lcore_params_array_default[] = { 185 {0, 0, 2}, 186 {0, 1, 2}, 187 {0, 2, 2}, 188 {1, 0, 2}, 189 {1, 1, 2}, 190 {1, 2, 2}, 191 {2, 0, 2}, 192 {3, 0, 3}, 193 {3, 1, 3}, 194 }; 195 196 struct lcore_params *lcore_params = lcore_params_array_default; 197 uint16_t nb_lcore_params = sizeof(lcore_params_array_default) / 198 sizeof(lcore_params_array_default[0]); 199 200 static struct rte_eth_conf port_conf = { 201 .rxmode = { 202 .mq_mode = ETH_MQ_RX_RSS, 203 .max_rx_pkt_len = RTE_ETHER_MAX_LEN, 204 .split_hdr_size = 0, 205 .offloads = DEV_RX_OFFLOAD_CHECKSUM, 206 }, 207 .rx_adv_conf = { 208 .rss_conf = { 209 .rss_key = NULL, 210 .rss_hf = ETH_RSS_UDP, 211 }, 212 }, 213 .txmode = { 214 .mq_mode = ETH_MQ_TX_NONE, 215 }, 216 .intr_conf = { 217 .rxq = 1, 218 }, 219 }; 220 221 static struct rte_mempool * pktmbuf_pool[NB_SOCKETS]; 222 223 224 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 225 226 #ifdef RTE_ARCH_X86 227 #include <rte_hash_crc.h> 228 #define DEFAULT_HASH_FUNC rte_hash_crc 229 #else 230 #include <rte_jhash.h> 231 #define DEFAULT_HASH_FUNC rte_jhash 232 #endif 233 234 struct ipv4_5tuple { 235 uint32_t ip_dst; 236 uint32_t ip_src; 237 uint16_t port_dst; 238 uint16_t port_src; 239 uint8_t proto; 240 } __attribute__((__packed__)); 241 242 struct ipv6_5tuple { 243 uint8_t ip_dst[IPV6_ADDR_LEN]; 244 uint8_t ip_src[IPV6_ADDR_LEN]; 245 uint16_t port_dst; 246 uint16_t port_src; 247 uint8_t proto; 248 } __attribute__((__packed__)); 249 250 struct ipv4_l3fwd_route { 251 struct ipv4_5tuple key; 252 uint8_t if_out; 253 }; 254 255 struct ipv6_l3fwd_route { 256 struct ipv6_5tuple key; 257 uint8_t if_out; 258 }; 259 260 static struct ipv4_l3fwd_route ipv4_l3fwd_route_array[] = { 261 {{RTE_IPv4(100,10,0,1), RTE_IPv4(200,10,0,1), 101, 11, IPPROTO_TCP}, 0}, 262 {{RTE_IPv4(100,20,0,2), RTE_IPv4(200,20,0,2), 102, 12, IPPROTO_TCP}, 1}, 263 {{RTE_IPv4(100,30,0,3), RTE_IPv4(200,30,0,3), 103, 13, IPPROTO_TCP}, 2}, 264 {{RTE_IPv4(100,40,0,4), RTE_IPv4(200,40,0,4), 104, 14, IPPROTO_TCP}, 3}, 265 }; 266 267 static struct ipv6_l3fwd_route ipv6_l3fwd_route_array[] = { 268 { 269 { 270 {0xfe, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 271 0x02, 0x1b, 0x21, 0xff, 0xfe, 0x91, 0x38, 0x05}, 272 {0xfe, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 273 0x02, 0x1e, 0x67, 0xff, 0xfe, 0x0d, 0xb6, 0x0a}, 274 1, 10, IPPROTO_UDP 275 }, 4 276 }, 277 }; 278 279 typedef struct rte_hash lookup_struct_t; 280 static lookup_struct_t *ipv4_l3fwd_lookup_struct[NB_SOCKETS]; 281 static lookup_struct_t *ipv6_l3fwd_lookup_struct[NB_SOCKETS]; 282 283 #define L3FWD_HASH_ENTRIES 1024 284 285 #define IPV4_L3FWD_NUM_ROUTES \ 286 (sizeof(ipv4_l3fwd_route_array) / sizeof(ipv4_l3fwd_route_array[0])) 287 288 #define IPV6_L3FWD_NUM_ROUTES \ 289 (sizeof(ipv6_l3fwd_route_array) / sizeof(ipv6_l3fwd_route_array[0])) 290 291 static uint16_t ipv4_l3fwd_out_if[L3FWD_HASH_ENTRIES] __rte_cache_aligned; 292 static uint16_t ipv6_l3fwd_out_if[L3FWD_HASH_ENTRIES] __rte_cache_aligned; 293 #endif 294 295 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 296 struct ipv4_l3fwd_route { 297 uint32_t ip; 298 uint8_t depth; 299 uint8_t if_out; 300 }; 301 302 static struct ipv4_l3fwd_route ipv4_l3fwd_route_array[] = { 303 {RTE_IPv4(1,1,1,0), 24, 0}, 304 {RTE_IPv4(2,1,1,0), 24, 1}, 305 {RTE_IPv4(3,1,1,0), 24, 2}, 306 {RTE_IPv4(4,1,1,0), 24, 3}, 307 {RTE_IPv4(5,1,1,0), 24, 4}, 308 {RTE_IPv4(6,1,1,0), 24, 5}, 309 {RTE_IPv4(7,1,1,0), 24, 6}, 310 {RTE_IPv4(8,1,1,0), 24, 7}, 311 }; 312 313 #define IPV4_L3FWD_NUM_ROUTES \ 314 (sizeof(ipv4_l3fwd_route_array) / sizeof(ipv4_l3fwd_route_array[0])) 315 316 #define IPV4_L3FWD_LPM_MAX_RULES 1024 317 318 typedef struct rte_lpm lookup_struct_t; 319 static lookup_struct_t *ipv4_l3fwd_lookup_struct[NB_SOCKETS]; 320 #endif 321 322 struct lcore_conf { 323 uint16_t n_rx_queue; 324 struct lcore_rx_queue rx_queue_list[MAX_RX_QUEUE_PER_LCORE]; 325 uint16_t n_tx_port; 326 uint16_t tx_port_id[RTE_MAX_ETHPORTS]; 327 uint16_t tx_queue_id[RTE_MAX_ETHPORTS]; 328 struct rte_eth_dev_tx_buffer *tx_buffer[RTE_MAX_ETHPORTS]; 329 lookup_struct_t * ipv4_lookup_struct; 330 lookup_struct_t * ipv6_lookup_struct; 331 } __rte_cache_aligned; 332 333 struct lcore_stats { 334 /* total sleep time in ms since last frequency scaling down */ 335 uint32_t sleep_time; 336 /* number of long sleep recently */ 337 uint32_t nb_long_sleep; 338 /* freq. scaling up trend */ 339 uint32_t trend; 340 /* total packet processed recently */ 341 uint64_t nb_rx_processed; 342 /* total iterations looped recently */ 343 uint64_t nb_iteration_looped; 344 uint32_t padding[9]; 345 } __rte_cache_aligned; 346 347 static struct lcore_conf lcore_conf[RTE_MAX_LCORE] __rte_cache_aligned; 348 static struct lcore_stats stats[RTE_MAX_LCORE] __rte_cache_aligned; 349 static struct rte_timer power_timers[RTE_MAX_LCORE]; 350 351 static inline uint32_t power_idle_heuristic(uint32_t zero_rx_packet_count); 352 static inline enum freq_scale_hint_t power_freq_scaleup_heuristic( \ 353 unsigned int lcore_id, uint16_t port_id, uint16_t queue_id); 354 355 356 /* 357 * These defaults are using the max frequency index (1), a medium index (9) 358 * and a typical low frequency index (14). These can be adjusted to use 359 * different indexes using the relevant command line parameters. 360 */ 361 static uint8_t freq_tlb[] = {14, 9, 1}; 362 363 static int is_done(void) 364 { 365 return empty_poll_stop; 366 } 367 368 /* exit signal handler */ 369 static void 370 signal_exit_now(int sigtype) 371 { 372 unsigned lcore_id; 373 unsigned int portid; 374 int ret; 375 376 if (sigtype == SIGINT) { 377 if (empty_poll_on) 378 empty_poll_stop = true; 379 380 381 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { 382 if (rte_lcore_is_enabled(lcore_id) == 0) 383 continue; 384 385 /* init power management library */ 386 ret = rte_power_exit(lcore_id); 387 if (ret) 388 rte_exit(EXIT_FAILURE, "Power management " 389 "library de-initialization failed on " 390 "core%u\n", lcore_id); 391 } 392 393 if (!empty_poll_on) { 394 RTE_ETH_FOREACH_DEV(portid) { 395 if ((enabled_port_mask & (1 << portid)) == 0) 396 continue; 397 398 rte_eth_dev_stop(portid); 399 rte_eth_dev_close(portid); 400 } 401 } 402 } 403 404 if (!empty_poll_on) 405 rte_exit(EXIT_SUCCESS, "User forced exit\n"); 406 } 407 408 /* Freqency scale down timer callback */ 409 static void 410 power_timer_cb(__attribute__((unused)) struct rte_timer *tim, 411 __attribute__((unused)) void *arg) 412 { 413 uint64_t hz; 414 float sleep_time_ratio; 415 unsigned lcore_id = rte_lcore_id(); 416 417 /* accumulate total execution time in us when callback is invoked */ 418 sleep_time_ratio = (float)(stats[lcore_id].sleep_time) / 419 (float)SCALING_PERIOD; 420 /** 421 * check whether need to scale down frequency a step if it sleep a lot. 422 */ 423 if (sleep_time_ratio >= SCALING_DOWN_TIME_RATIO_THRESHOLD) { 424 if (rte_power_freq_down) 425 rte_power_freq_down(lcore_id); 426 } 427 else if ( (unsigned)(stats[lcore_id].nb_rx_processed / 428 stats[lcore_id].nb_iteration_looped) < MAX_PKT_BURST) { 429 /** 430 * scale down a step if average packet per iteration less 431 * than expectation. 432 */ 433 if (rte_power_freq_down) 434 rte_power_freq_down(lcore_id); 435 } 436 437 /** 438 * initialize another timer according to current frequency to ensure 439 * timer interval is relatively fixed. 440 */ 441 hz = rte_get_timer_hz(); 442 rte_timer_reset(&power_timers[lcore_id], hz/TIMER_NUMBER_PER_SECOND, 443 SINGLE, lcore_id, power_timer_cb, NULL); 444 445 stats[lcore_id].nb_rx_processed = 0; 446 stats[lcore_id].nb_iteration_looped = 0; 447 448 stats[lcore_id].sleep_time = 0; 449 } 450 451 /* Enqueue a single packet, and send burst if queue is filled */ 452 static inline int 453 send_single_packet(struct rte_mbuf *m, uint16_t port) 454 { 455 uint32_t lcore_id; 456 struct lcore_conf *qconf; 457 458 lcore_id = rte_lcore_id(); 459 qconf = &lcore_conf[lcore_id]; 460 461 rte_eth_tx_buffer(port, qconf->tx_queue_id[port], 462 qconf->tx_buffer[port], m); 463 464 return 0; 465 } 466 467 #ifdef DO_RFC_1812_CHECKS 468 static inline int 469 is_valid_ipv4_pkt(struct rte_ipv4_hdr *pkt, uint32_t link_len) 470 { 471 /* From http://www.rfc-editor.org/rfc/rfc1812.txt section 5.2.2 */ 472 /* 473 * 1. The packet length reported by the Link Layer must be large 474 * enough to hold the minimum length legal IP datagram (20 bytes). 475 */ 476 if (link_len < sizeof(struct rte_ipv4_hdr)) 477 return -1; 478 479 /* 2. The IP checksum must be correct. */ 480 /* this is checked in H/W */ 481 482 /* 483 * 3. The IP version number must be 4. If the version number is not 4 484 * then the packet may be another version of IP, such as IPng or 485 * ST-II. 486 */ 487 if (((pkt->version_ihl) >> 4) != 4) 488 return -3; 489 /* 490 * 4. The IP header length field must be large enough to hold the 491 * minimum length legal IP datagram (20 bytes = 5 words). 492 */ 493 if ((pkt->version_ihl & 0xf) < 5) 494 return -4; 495 496 /* 497 * 5. The IP total length field must be large enough to hold the IP 498 * datagram header, whose length is specified in the IP header length 499 * field. 500 */ 501 if (rte_cpu_to_be_16(pkt->total_length) < sizeof(struct rte_ipv4_hdr)) 502 return -5; 503 504 return 0; 505 } 506 #endif 507 508 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 509 static void 510 print_ipv4_key(struct ipv4_5tuple key) 511 { 512 printf("IP dst = %08x, IP src = %08x, port dst = %d, port src = %d, " 513 "proto = %d\n", (unsigned)key.ip_dst, (unsigned)key.ip_src, 514 key.port_dst, key.port_src, key.proto); 515 } 516 static void 517 print_ipv6_key(struct ipv6_5tuple key) 518 { 519 printf( "IP dst = " IPv6_BYTES_FMT ", IP src = " IPv6_BYTES_FMT ", " 520 "port dst = %d, port src = %d, proto = %d\n", 521 IPv6_BYTES(key.ip_dst), IPv6_BYTES(key.ip_src), 522 key.port_dst, key.port_src, key.proto); 523 } 524 525 static inline uint16_t 526 get_ipv4_dst_port(struct rte_ipv4_hdr *ipv4_hdr, uint16_t portid, 527 lookup_struct_t * ipv4_l3fwd_lookup_struct) 528 { 529 struct ipv4_5tuple key; 530 struct rte_tcp_hdr *tcp; 531 struct rte_udp_hdr *udp; 532 int ret = 0; 533 534 key.ip_dst = rte_be_to_cpu_32(ipv4_hdr->dst_addr); 535 key.ip_src = rte_be_to_cpu_32(ipv4_hdr->src_addr); 536 key.proto = ipv4_hdr->next_proto_id; 537 538 switch (ipv4_hdr->next_proto_id) { 539 case IPPROTO_TCP: 540 tcp = (struct rte_tcp_hdr *)((unsigned char *)ipv4_hdr + 541 sizeof(struct rte_ipv4_hdr)); 542 key.port_dst = rte_be_to_cpu_16(tcp->dst_port); 543 key.port_src = rte_be_to_cpu_16(tcp->src_port); 544 break; 545 546 case IPPROTO_UDP: 547 udp = (struct rte_udp_hdr *)((unsigned char *)ipv4_hdr + 548 sizeof(struct rte_ipv4_hdr)); 549 key.port_dst = rte_be_to_cpu_16(udp->dst_port); 550 key.port_src = rte_be_to_cpu_16(udp->src_port); 551 break; 552 553 default: 554 key.port_dst = 0; 555 key.port_src = 0; 556 break; 557 } 558 559 /* Find destination port */ 560 ret = rte_hash_lookup(ipv4_l3fwd_lookup_struct, (const void *)&key); 561 return ((ret < 0) ? portid : ipv4_l3fwd_out_if[ret]); 562 } 563 564 static inline uint16_t 565 get_ipv6_dst_port(struct rte_ipv6_hdr *ipv6_hdr, uint16_t portid, 566 lookup_struct_t *ipv6_l3fwd_lookup_struct) 567 { 568 struct ipv6_5tuple key; 569 struct rte_tcp_hdr *tcp; 570 struct rte_udp_hdr *udp; 571 int ret = 0; 572 573 memcpy(key.ip_dst, ipv6_hdr->dst_addr, IPV6_ADDR_LEN); 574 memcpy(key.ip_src, ipv6_hdr->src_addr, IPV6_ADDR_LEN); 575 576 key.proto = ipv6_hdr->proto; 577 578 switch (ipv6_hdr->proto) { 579 case IPPROTO_TCP: 580 tcp = (struct rte_tcp_hdr *)((unsigned char *) ipv6_hdr + 581 sizeof(struct rte_ipv6_hdr)); 582 key.port_dst = rte_be_to_cpu_16(tcp->dst_port); 583 key.port_src = rte_be_to_cpu_16(tcp->src_port); 584 break; 585 586 case IPPROTO_UDP: 587 udp = (struct rte_udp_hdr *)((unsigned char *) ipv6_hdr + 588 sizeof(struct rte_ipv6_hdr)); 589 key.port_dst = rte_be_to_cpu_16(udp->dst_port); 590 key.port_src = rte_be_to_cpu_16(udp->src_port); 591 break; 592 593 default: 594 key.port_dst = 0; 595 key.port_src = 0; 596 break; 597 } 598 599 /* Find destination port */ 600 ret = rte_hash_lookup(ipv6_l3fwd_lookup_struct, (const void *)&key); 601 return ((ret < 0) ? portid : ipv6_l3fwd_out_if[ret]); 602 } 603 #endif 604 605 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 606 static inline uint16_t 607 get_ipv4_dst_port(struct rte_ipv4_hdr *ipv4_hdr, uint16_t portid, 608 lookup_struct_t *ipv4_l3fwd_lookup_struct) 609 { 610 uint32_t next_hop; 611 612 return ((rte_lpm_lookup(ipv4_l3fwd_lookup_struct, 613 rte_be_to_cpu_32(ipv4_hdr->dst_addr), &next_hop) == 0)? 614 next_hop : portid); 615 } 616 #endif 617 618 static inline void 619 parse_ptype_one(struct rte_mbuf *m) 620 { 621 struct rte_ether_hdr *eth_hdr; 622 uint32_t packet_type = RTE_PTYPE_UNKNOWN; 623 uint16_t ether_type; 624 625 eth_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); 626 ether_type = eth_hdr->ether_type; 627 if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPv4)) 628 packet_type |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN; 629 else if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPv6)) 630 packet_type |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN; 631 632 m->packet_type = packet_type; 633 } 634 635 static uint16_t 636 cb_parse_ptype(uint16_t port __rte_unused, uint16_t queue __rte_unused, 637 struct rte_mbuf *pkts[], uint16_t nb_pkts, 638 uint16_t max_pkts __rte_unused, 639 void *user_param __rte_unused) 640 { 641 unsigned int i; 642 643 for (i = 0; i < nb_pkts; ++i) 644 parse_ptype_one(pkts[i]); 645 646 return nb_pkts; 647 } 648 649 static int 650 add_cb_parse_ptype(uint16_t portid, uint16_t queueid) 651 { 652 printf("Port %d: softly parse packet type info\n", portid); 653 if (rte_eth_add_rx_callback(portid, queueid, cb_parse_ptype, NULL)) 654 return 0; 655 656 printf("Failed to add rx callback: port=%d\n", portid); 657 return -1; 658 } 659 660 static inline void 661 l3fwd_simple_forward(struct rte_mbuf *m, uint16_t portid, 662 struct lcore_conf *qconf) 663 { 664 struct rte_ether_hdr *eth_hdr; 665 struct rte_ipv4_hdr *ipv4_hdr; 666 void *d_addr_bytes; 667 uint16_t dst_port; 668 669 eth_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); 670 671 if (RTE_ETH_IS_IPV4_HDR(m->packet_type)) { 672 /* Handle IPv4 headers.*/ 673 ipv4_hdr = 674 rte_pktmbuf_mtod_offset(m, struct rte_ipv4_hdr *, 675 sizeof(struct rte_ether_hdr)); 676 677 #ifdef DO_RFC_1812_CHECKS 678 /* Check to make sure the packet is valid (RFC1812) */ 679 if (is_valid_ipv4_pkt(ipv4_hdr, m->pkt_len) < 0) { 680 rte_pktmbuf_free(m); 681 return; 682 } 683 #endif 684 685 dst_port = get_ipv4_dst_port(ipv4_hdr, portid, 686 qconf->ipv4_lookup_struct); 687 if (dst_port >= RTE_MAX_ETHPORTS || 688 (enabled_port_mask & 1 << dst_port) == 0) 689 dst_port = portid; 690 691 /* 02:00:00:00:00:xx */ 692 d_addr_bytes = ð_hdr->d_addr.addr_bytes[0]; 693 *((uint64_t *)d_addr_bytes) = 694 0x000000000002 + ((uint64_t)dst_port << 40); 695 696 #ifdef DO_RFC_1812_CHECKS 697 /* Update time to live and header checksum */ 698 --(ipv4_hdr->time_to_live); 699 ++(ipv4_hdr->hdr_checksum); 700 #endif 701 702 /* src addr */ 703 rte_ether_addr_copy(&ports_eth_addr[dst_port], 704 ð_hdr->s_addr); 705 706 send_single_packet(m, dst_port); 707 } else if (RTE_ETH_IS_IPV6_HDR(m->packet_type)) { 708 /* Handle IPv6 headers.*/ 709 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 710 struct rte_ipv6_hdr *ipv6_hdr; 711 712 ipv6_hdr = 713 rte_pktmbuf_mtod_offset(m, struct rte_ipv6_hdr *, 714 sizeof(struct rte_ether_hdr)); 715 716 dst_port = get_ipv6_dst_port(ipv6_hdr, portid, 717 qconf->ipv6_lookup_struct); 718 719 if (dst_port >= RTE_MAX_ETHPORTS || 720 (enabled_port_mask & 1 << dst_port) == 0) 721 dst_port = portid; 722 723 /* 02:00:00:00:00:xx */ 724 d_addr_bytes = ð_hdr->d_addr.addr_bytes[0]; 725 *((uint64_t *)d_addr_bytes) = 726 0x000000000002 + ((uint64_t)dst_port << 40); 727 728 /* src addr */ 729 rte_ether_addr_copy(&ports_eth_addr[dst_port], 730 ð_hdr->s_addr); 731 732 send_single_packet(m, dst_port); 733 #else 734 /* We don't currently handle IPv6 packets in LPM mode. */ 735 rte_pktmbuf_free(m); 736 #endif 737 } else 738 rte_pktmbuf_free(m); 739 740 } 741 742 #define MINIMUM_SLEEP_TIME 1 743 #define SUSPEND_THRESHOLD 300 744 745 static inline uint32_t 746 power_idle_heuristic(uint32_t zero_rx_packet_count) 747 { 748 /* If zero count is less than 100, sleep 1us */ 749 if (zero_rx_packet_count < SUSPEND_THRESHOLD) 750 return MINIMUM_SLEEP_TIME; 751 /* If zero count is less than 1000, sleep 100 us which is the 752 minimum latency switching from C3/C6 to C0 753 */ 754 else 755 return SUSPEND_THRESHOLD; 756 } 757 758 static inline enum freq_scale_hint_t 759 power_freq_scaleup_heuristic(unsigned lcore_id, 760 uint16_t port_id, 761 uint16_t queue_id) 762 { 763 uint32_t rxq_count = rte_eth_rx_queue_count(port_id, queue_id); 764 /** 765 * HW Rx queue size is 128 by default, Rx burst read at maximum 32 entries 766 * per iteration 767 */ 768 #define FREQ_GEAR1_RX_PACKET_THRESHOLD MAX_PKT_BURST 769 #define FREQ_GEAR2_RX_PACKET_THRESHOLD (MAX_PKT_BURST*2) 770 #define FREQ_GEAR3_RX_PACKET_THRESHOLD (MAX_PKT_BURST*3) 771 #define FREQ_UP_TREND1_ACC 1 772 #define FREQ_UP_TREND2_ACC 100 773 #define FREQ_UP_THRESHOLD 10000 774 775 if (likely(rxq_count > FREQ_GEAR3_RX_PACKET_THRESHOLD)) { 776 stats[lcore_id].trend = 0; 777 return FREQ_HIGHEST; 778 } else if (likely(rxq_count > FREQ_GEAR2_RX_PACKET_THRESHOLD)) 779 stats[lcore_id].trend += FREQ_UP_TREND2_ACC; 780 else if (likely(rxq_count > FREQ_GEAR1_RX_PACKET_THRESHOLD)) 781 stats[lcore_id].trend += FREQ_UP_TREND1_ACC; 782 783 if (likely(stats[lcore_id].trend > FREQ_UP_THRESHOLD)) { 784 stats[lcore_id].trend = 0; 785 return FREQ_HIGHER; 786 } 787 788 return FREQ_CURRENT; 789 } 790 791 /** 792 * force polling thread sleep until one-shot rx interrupt triggers 793 * @param port_id 794 * Port id. 795 * @param queue_id 796 * Rx queue id. 797 * @return 798 * 0 on success 799 */ 800 static int 801 sleep_until_rx_interrupt(int num) 802 { 803 struct rte_epoll_event event[num]; 804 int n, i; 805 uint16_t port_id; 806 uint8_t queue_id; 807 void *data; 808 809 RTE_LOG(INFO, L3FWD_POWER, 810 "lcore %u sleeps until interrupt triggers\n", 811 rte_lcore_id()); 812 813 n = rte_epoll_wait(RTE_EPOLL_PER_THREAD, event, num, -1); 814 for (i = 0; i < n; i++) { 815 data = event[i].epdata.data; 816 port_id = ((uintptr_t)data) >> CHAR_BIT; 817 queue_id = ((uintptr_t)data) & 818 RTE_LEN2MASK(CHAR_BIT, uint8_t); 819 rte_eth_dev_rx_intr_disable(port_id, queue_id); 820 RTE_LOG(INFO, L3FWD_POWER, 821 "lcore %u is waked up from rx interrupt on" 822 " port %d queue %d\n", 823 rte_lcore_id(), port_id, queue_id); 824 } 825 826 return 0; 827 } 828 829 static void turn_on_intr(struct lcore_conf *qconf) 830 { 831 int i; 832 struct lcore_rx_queue *rx_queue; 833 uint8_t queue_id; 834 uint16_t port_id; 835 836 for (i = 0; i < qconf->n_rx_queue; ++i) { 837 rx_queue = &(qconf->rx_queue_list[i]); 838 port_id = rx_queue->port_id; 839 queue_id = rx_queue->queue_id; 840 841 rte_spinlock_lock(&(locks[port_id])); 842 rte_eth_dev_rx_intr_enable(port_id, queue_id); 843 rte_spinlock_unlock(&(locks[port_id])); 844 } 845 } 846 847 static int event_register(struct lcore_conf *qconf) 848 { 849 struct lcore_rx_queue *rx_queue; 850 uint8_t queueid; 851 uint16_t portid; 852 uint32_t data; 853 int ret; 854 int i; 855 856 for (i = 0; i < qconf->n_rx_queue; ++i) { 857 rx_queue = &(qconf->rx_queue_list[i]); 858 portid = rx_queue->port_id; 859 queueid = rx_queue->queue_id; 860 data = portid << CHAR_BIT | queueid; 861 862 ret = rte_eth_dev_rx_intr_ctl_q(portid, queueid, 863 RTE_EPOLL_PER_THREAD, 864 RTE_INTR_EVENT_ADD, 865 (void *)((uintptr_t)data)); 866 if (ret) 867 return ret; 868 } 869 870 return 0; 871 } 872 /* main processing loop */ 873 static int 874 main_empty_poll_loop(__attribute__((unused)) void *dummy) 875 { 876 struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; 877 unsigned int lcore_id; 878 uint64_t prev_tsc, diff_tsc, cur_tsc; 879 int i, j, nb_rx; 880 uint8_t queueid; 881 uint16_t portid; 882 struct lcore_conf *qconf; 883 struct lcore_rx_queue *rx_queue; 884 885 const uint64_t drain_tsc = 886 (rte_get_tsc_hz() + US_PER_S - 1) / 887 US_PER_S * BURST_TX_DRAIN_US; 888 889 prev_tsc = 0; 890 891 lcore_id = rte_lcore_id(); 892 qconf = &lcore_conf[lcore_id]; 893 894 if (qconf->n_rx_queue == 0) { 895 RTE_LOG(INFO, L3FWD_POWER, "lcore %u has nothing to do\n", 896 lcore_id); 897 return 0; 898 } 899 900 for (i = 0; i < qconf->n_rx_queue; i++) { 901 portid = qconf->rx_queue_list[i].port_id; 902 queueid = qconf->rx_queue_list[i].queue_id; 903 RTE_LOG(INFO, L3FWD_POWER, " -- lcoreid=%u portid=%u " 904 "rxqueueid=%hhu\n", lcore_id, portid, queueid); 905 } 906 907 while (!is_done()) { 908 stats[lcore_id].nb_iteration_looped++; 909 910 cur_tsc = rte_rdtsc(); 911 /* 912 * TX burst queue drain 913 */ 914 diff_tsc = cur_tsc - prev_tsc; 915 if (unlikely(diff_tsc > drain_tsc)) { 916 for (i = 0; i < qconf->n_tx_port; ++i) { 917 portid = qconf->tx_port_id[i]; 918 rte_eth_tx_buffer_flush(portid, 919 qconf->tx_queue_id[portid], 920 qconf->tx_buffer[portid]); 921 } 922 prev_tsc = cur_tsc; 923 } 924 925 /* 926 * Read packet from RX queues 927 */ 928 for (i = 0; i < qconf->n_rx_queue; ++i) { 929 rx_queue = &(qconf->rx_queue_list[i]); 930 rx_queue->idle_hint = 0; 931 portid = rx_queue->port_id; 932 queueid = rx_queue->queue_id; 933 934 nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst, 935 MAX_PKT_BURST); 936 937 stats[lcore_id].nb_rx_processed += nb_rx; 938 939 if (nb_rx == 0) { 940 941 rte_power_empty_poll_stat_update(lcore_id); 942 943 continue; 944 } else { 945 rte_power_poll_stat_update(lcore_id, nb_rx); 946 } 947 948 949 /* Prefetch first packets */ 950 for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++) { 951 rte_prefetch0(rte_pktmbuf_mtod( 952 pkts_burst[j], void *)); 953 } 954 955 /* Prefetch and forward already prefetched packets */ 956 for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) { 957 rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[ 958 j + PREFETCH_OFFSET], 959 void *)); 960 l3fwd_simple_forward(pkts_burst[j], portid, 961 qconf); 962 } 963 964 /* Forward remaining prefetched packets */ 965 for (; j < nb_rx; j++) { 966 l3fwd_simple_forward(pkts_burst[j], portid, 967 qconf); 968 } 969 970 } 971 972 } 973 974 return 0; 975 } 976 /* main processing loop */ 977 static int 978 main_loop(__attribute__((unused)) void *dummy) 979 { 980 struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; 981 unsigned lcore_id; 982 uint64_t prev_tsc, diff_tsc, cur_tsc, tim_res_tsc, hz; 983 uint64_t prev_tsc_power = 0, cur_tsc_power, diff_tsc_power; 984 int i, j, nb_rx; 985 uint8_t queueid; 986 uint16_t portid; 987 struct lcore_conf *qconf; 988 struct lcore_rx_queue *rx_queue; 989 enum freq_scale_hint_t lcore_scaleup_hint; 990 uint32_t lcore_rx_idle_count = 0; 991 uint32_t lcore_idle_hint = 0; 992 int intr_en = 0; 993 994 const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * BURST_TX_DRAIN_US; 995 996 prev_tsc = 0; 997 hz = rte_get_timer_hz(); 998 tim_res_tsc = hz/TIMER_NUMBER_PER_SECOND; 999 1000 lcore_id = rte_lcore_id(); 1001 qconf = &lcore_conf[lcore_id]; 1002 1003 if (qconf->n_rx_queue == 0) { 1004 RTE_LOG(INFO, L3FWD_POWER, "lcore %u has nothing to do\n", lcore_id); 1005 return 0; 1006 } 1007 1008 RTE_LOG(INFO, L3FWD_POWER, "entering main loop on lcore %u\n", lcore_id); 1009 1010 for (i = 0; i < qconf->n_rx_queue; i++) { 1011 portid = qconf->rx_queue_list[i].port_id; 1012 queueid = qconf->rx_queue_list[i].queue_id; 1013 RTE_LOG(INFO, L3FWD_POWER, " -- lcoreid=%u portid=%u " 1014 "rxqueueid=%hhu\n", lcore_id, portid, queueid); 1015 } 1016 1017 /* add into event wait list */ 1018 if (event_register(qconf) == 0) 1019 intr_en = 1; 1020 else 1021 RTE_LOG(INFO, L3FWD_POWER, "RX interrupt won't enable.\n"); 1022 1023 while (1) { 1024 stats[lcore_id].nb_iteration_looped++; 1025 1026 cur_tsc = rte_rdtsc(); 1027 cur_tsc_power = cur_tsc; 1028 1029 /* 1030 * TX burst queue drain 1031 */ 1032 diff_tsc = cur_tsc - prev_tsc; 1033 if (unlikely(diff_tsc > drain_tsc)) { 1034 for (i = 0; i < qconf->n_tx_port; ++i) { 1035 portid = qconf->tx_port_id[i]; 1036 rte_eth_tx_buffer_flush(portid, 1037 qconf->tx_queue_id[portid], 1038 qconf->tx_buffer[portid]); 1039 } 1040 prev_tsc = cur_tsc; 1041 } 1042 1043 diff_tsc_power = cur_tsc_power - prev_tsc_power; 1044 if (diff_tsc_power > tim_res_tsc) { 1045 rte_timer_manage(); 1046 prev_tsc_power = cur_tsc_power; 1047 } 1048 1049 start_rx: 1050 /* 1051 * Read packet from RX queues 1052 */ 1053 lcore_scaleup_hint = FREQ_CURRENT; 1054 lcore_rx_idle_count = 0; 1055 for (i = 0; i < qconf->n_rx_queue; ++i) { 1056 rx_queue = &(qconf->rx_queue_list[i]); 1057 rx_queue->idle_hint = 0; 1058 portid = rx_queue->port_id; 1059 queueid = rx_queue->queue_id; 1060 1061 nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst, 1062 MAX_PKT_BURST); 1063 1064 stats[lcore_id].nb_rx_processed += nb_rx; 1065 if (unlikely(nb_rx == 0)) { 1066 /** 1067 * no packet received from rx queue, try to 1068 * sleep for a while forcing CPU enter deeper 1069 * C states. 1070 */ 1071 rx_queue->zero_rx_packet_count++; 1072 1073 if (rx_queue->zero_rx_packet_count <= 1074 MIN_ZERO_POLL_COUNT) 1075 continue; 1076 1077 rx_queue->idle_hint = power_idle_heuristic(\ 1078 rx_queue->zero_rx_packet_count); 1079 lcore_rx_idle_count++; 1080 } else { 1081 rx_queue->zero_rx_packet_count = 0; 1082 1083 /** 1084 * do not scale up frequency immediately as 1085 * user to kernel space communication is costly 1086 * which might impact packet I/O for received 1087 * packets. 1088 */ 1089 rx_queue->freq_up_hint = 1090 power_freq_scaleup_heuristic(lcore_id, 1091 portid, queueid); 1092 } 1093 1094 /* Prefetch first packets */ 1095 for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++) { 1096 rte_prefetch0(rte_pktmbuf_mtod( 1097 pkts_burst[j], void *)); 1098 } 1099 1100 /* Prefetch and forward already prefetched packets */ 1101 for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) { 1102 rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[ 1103 j + PREFETCH_OFFSET], void *)); 1104 l3fwd_simple_forward(pkts_burst[j], portid, 1105 qconf); 1106 } 1107 1108 /* Forward remaining prefetched packets */ 1109 for (; j < nb_rx; j++) { 1110 l3fwd_simple_forward(pkts_burst[j], portid, 1111 qconf); 1112 } 1113 } 1114 1115 if (likely(lcore_rx_idle_count != qconf->n_rx_queue)) { 1116 for (i = 1, lcore_scaleup_hint = 1117 qconf->rx_queue_list[0].freq_up_hint; 1118 i < qconf->n_rx_queue; ++i) { 1119 rx_queue = &(qconf->rx_queue_list[i]); 1120 if (rx_queue->freq_up_hint > 1121 lcore_scaleup_hint) 1122 lcore_scaleup_hint = 1123 rx_queue->freq_up_hint; 1124 } 1125 1126 if (lcore_scaleup_hint == FREQ_HIGHEST) { 1127 if (rte_power_freq_max) 1128 rte_power_freq_max(lcore_id); 1129 } else if (lcore_scaleup_hint == FREQ_HIGHER) { 1130 if (rte_power_freq_up) 1131 rte_power_freq_up(lcore_id); 1132 } 1133 } else { 1134 /** 1135 * All Rx queues empty in recent consecutive polls, 1136 * sleep in a conservative manner, meaning sleep as 1137 * less as possible. 1138 */ 1139 for (i = 1, lcore_idle_hint = 1140 qconf->rx_queue_list[0].idle_hint; 1141 i < qconf->n_rx_queue; ++i) { 1142 rx_queue = &(qconf->rx_queue_list[i]); 1143 if (rx_queue->idle_hint < lcore_idle_hint) 1144 lcore_idle_hint = rx_queue->idle_hint; 1145 } 1146 1147 if (lcore_idle_hint < SUSPEND_THRESHOLD) 1148 /** 1149 * execute "pause" instruction to avoid context 1150 * switch which generally take hundred of 1151 * microseconds for short sleep. 1152 */ 1153 rte_delay_us(lcore_idle_hint); 1154 else { 1155 /* suspend until rx interrupt trigges */ 1156 if (intr_en) { 1157 turn_on_intr(qconf); 1158 sleep_until_rx_interrupt( 1159 qconf->n_rx_queue); 1160 /** 1161 * start receiving packets immediately 1162 */ 1163 goto start_rx; 1164 } 1165 } 1166 stats[lcore_id].sleep_time += lcore_idle_hint; 1167 } 1168 } 1169 } 1170 1171 static int 1172 check_lcore_params(void) 1173 { 1174 uint8_t queue, lcore; 1175 uint16_t i; 1176 int socketid; 1177 1178 for (i = 0; i < nb_lcore_params; ++i) { 1179 queue = lcore_params[i].queue_id; 1180 if (queue >= MAX_RX_QUEUE_PER_PORT) { 1181 printf("invalid queue number: %hhu\n", queue); 1182 return -1; 1183 } 1184 lcore = lcore_params[i].lcore_id; 1185 if (!rte_lcore_is_enabled(lcore)) { 1186 printf("error: lcore %hhu is not enabled in lcore " 1187 "mask\n", lcore); 1188 return -1; 1189 } 1190 if ((socketid = rte_lcore_to_socket_id(lcore) != 0) && 1191 (numa_on == 0)) { 1192 printf("warning: lcore %hhu is on socket %d with numa " 1193 "off\n", lcore, socketid); 1194 } 1195 } 1196 return 0; 1197 } 1198 1199 static int 1200 check_port_config(void) 1201 { 1202 unsigned portid; 1203 uint16_t i; 1204 1205 for (i = 0; i < nb_lcore_params; ++i) { 1206 portid = lcore_params[i].port_id; 1207 if ((enabled_port_mask & (1 << portid)) == 0) { 1208 printf("port %u is not enabled in port mask\n", 1209 portid); 1210 return -1; 1211 } 1212 if (!rte_eth_dev_is_valid_port(portid)) { 1213 printf("port %u is not present on the board\n", 1214 portid); 1215 return -1; 1216 } 1217 } 1218 return 0; 1219 } 1220 1221 static uint8_t 1222 get_port_n_rx_queues(const uint16_t port) 1223 { 1224 int queue = -1; 1225 uint16_t i; 1226 1227 for (i = 0; i < nb_lcore_params; ++i) { 1228 if (lcore_params[i].port_id == port && 1229 lcore_params[i].queue_id > queue) 1230 queue = lcore_params[i].queue_id; 1231 } 1232 return (uint8_t)(++queue); 1233 } 1234 1235 static int 1236 init_lcore_rx_queues(void) 1237 { 1238 uint16_t i, nb_rx_queue; 1239 uint8_t lcore; 1240 1241 for (i = 0; i < nb_lcore_params; ++i) { 1242 lcore = lcore_params[i].lcore_id; 1243 nb_rx_queue = lcore_conf[lcore].n_rx_queue; 1244 if (nb_rx_queue >= MAX_RX_QUEUE_PER_LCORE) { 1245 printf("error: too many queues (%u) for lcore: %u\n", 1246 (unsigned)nb_rx_queue + 1, (unsigned)lcore); 1247 return -1; 1248 } else { 1249 lcore_conf[lcore].rx_queue_list[nb_rx_queue].port_id = 1250 lcore_params[i].port_id; 1251 lcore_conf[lcore].rx_queue_list[nb_rx_queue].queue_id = 1252 lcore_params[i].queue_id; 1253 lcore_conf[lcore].n_rx_queue++; 1254 } 1255 } 1256 return 0; 1257 } 1258 1259 /* display usage */ 1260 static void 1261 print_usage(const char *prgname) 1262 { 1263 printf ("%s [EAL options] -- -p PORTMASK -P" 1264 " [--config (port,queue,lcore)[,(port,queue,lcore]]" 1265 " [--high-perf-cores CORELIST" 1266 " [--perf-config (port,queue,hi_perf,lcore_index)[,(port,queue,hi_perf,lcore_index]]" 1267 " [--enable-jumbo [--max-pkt-len PKTLEN]]\n" 1268 " -p PORTMASK: hexadecimal bitmask of ports to configure\n" 1269 " -P : enable promiscuous mode\n" 1270 " --config (port,queue,lcore): rx queues configuration\n" 1271 " --high-perf-cores CORELIST: list of high performance cores\n" 1272 " --perf-config: similar as config, cores specified as indices" 1273 " for bins containing high or regular performance cores\n" 1274 " --no-numa: optional, disable numa awareness\n" 1275 " --enable-jumbo: enable jumbo frame" 1276 " which max packet len is PKTLEN in decimal (64-9600)\n" 1277 " --parse-ptype: parse packet type by software\n" 1278 " --empty-poll: enable empty poll detection" 1279 " follow (training_flag, high_threshold, med_threshold)\n", 1280 prgname); 1281 } 1282 1283 static int parse_max_pkt_len(const char *pktlen) 1284 { 1285 char *end = NULL; 1286 unsigned long len; 1287 1288 /* parse decimal string */ 1289 len = strtoul(pktlen, &end, 10); 1290 if ((pktlen[0] == '\0') || (end == NULL) || (*end != '\0')) 1291 return -1; 1292 1293 if (len == 0) 1294 return -1; 1295 1296 return len; 1297 } 1298 1299 static int 1300 parse_portmask(const char *portmask) 1301 { 1302 char *end = NULL; 1303 unsigned long pm; 1304 1305 /* parse hexadecimal string */ 1306 pm = strtoul(portmask, &end, 16); 1307 if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0')) 1308 return -1; 1309 1310 if (pm == 0) 1311 return -1; 1312 1313 return pm; 1314 } 1315 1316 static int 1317 parse_config(const char *q_arg) 1318 { 1319 char s[256]; 1320 const char *p, *p0 = q_arg; 1321 char *end; 1322 enum fieldnames { 1323 FLD_PORT = 0, 1324 FLD_QUEUE, 1325 FLD_LCORE, 1326 _NUM_FLD 1327 }; 1328 unsigned long int_fld[_NUM_FLD]; 1329 char *str_fld[_NUM_FLD]; 1330 int i; 1331 unsigned size; 1332 1333 nb_lcore_params = 0; 1334 1335 while ((p = strchr(p0,'(')) != NULL) { 1336 ++p; 1337 if((p0 = strchr(p,')')) == NULL) 1338 return -1; 1339 1340 size = p0 - p; 1341 if(size >= sizeof(s)) 1342 return -1; 1343 1344 snprintf(s, sizeof(s), "%.*s", size, p); 1345 if (rte_strsplit(s, sizeof(s), str_fld, _NUM_FLD, ',') != 1346 _NUM_FLD) 1347 return -1; 1348 for (i = 0; i < _NUM_FLD; i++){ 1349 errno = 0; 1350 int_fld[i] = strtoul(str_fld[i], &end, 0); 1351 if (errno != 0 || end == str_fld[i] || int_fld[i] > 1352 255) 1353 return -1; 1354 } 1355 if (nb_lcore_params >= MAX_LCORE_PARAMS) { 1356 printf("exceeded max number of lcore params: %hu\n", 1357 nb_lcore_params); 1358 return -1; 1359 } 1360 lcore_params_array[nb_lcore_params].port_id = 1361 (uint8_t)int_fld[FLD_PORT]; 1362 lcore_params_array[nb_lcore_params].queue_id = 1363 (uint8_t)int_fld[FLD_QUEUE]; 1364 lcore_params_array[nb_lcore_params].lcore_id = 1365 (uint8_t)int_fld[FLD_LCORE]; 1366 ++nb_lcore_params; 1367 } 1368 lcore_params = lcore_params_array; 1369 1370 return 0; 1371 } 1372 static int 1373 parse_ep_config(const char *q_arg) 1374 { 1375 char s[256]; 1376 const char *p = q_arg; 1377 char *end; 1378 int num_arg; 1379 1380 char *str_fld[3]; 1381 1382 int training_flag; 1383 int med_edpi; 1384 int hgh_edpi; 1385 1386 ep_med_edpi = EMPTY_POLL_MED_THRESHOLD; 1387 ep_hgh_edpi = EMPTY_POLL_MED_THRESHOLD; 1388 1389 strlcpy(s, p, sizeof(s)); 1390 1391 num_arg = rte_strsplit(s, sizeof(s), str_fld, 3, ','); 1392 1393 empty_poll_train = false; 1394 1395 if (num_arg == 0) 1396 return 0; 1397 1398 if (num_arg == 3) { 1399 1400 training_flag = strtoul(str_fld[0], &end, 0); 1401 med_edpi = strtoul(str_fld[1], &end, 0); 1402 hgh_edpi = strtoul(str_fld[2], &end, 0); 1403 1404 if (training_flag == 1) 1405 empty_poll_train = true; 1406 1407 if (med_edpi > 0) 1408 ep_med_edpi = med_edpi; 1409 1410 if (med_edpi > 0) 1411 ep_hgh_edpi = hgh_edpi; 1412 1413 } else { 1414 1415 return -1; 1416 } 1417 1418 return 0; 1419 1420 } 1421 #define CMD_LINE_OPT_PARSE_PTYPE "parse-ptype" 1422 1423 /* Parse the argument given in the command line of the application */ 1424 static int 1425 parse_args(int argc, char **argv) 1426 { 1427 int opt, ret; 1428 char **argvopt; 1429 int option_index; 1430 uint32_t limit; 1431 char *prgname = argv[0]; 1432 static struct option lgopts[] = { 1433 {"config", 1, 0, 0}, 1434 {"perf-config", 1, 0, 0}, 1435 {"high-perf-cores", 1, 0, 0}, 1436 {"no-numa", 0, 0, 0}, 1437 {"enable-jumbo", 0, 0, 0}, 1438 {"empty-poll", 1, 0, 0}, 1439 {CMD_LINE_OPT_PARSE_PTYPE, 0, 0, 0}, 1440 {NULL, 0, 0, 0} 1441 }; 1442 1443 argvopt = argv; 1444 1445 while ((opt = getopt_long(argc, argvopt, "p:l:m:h:P", 1446 lgopts, &option_index)) != EOF) { 1447 1448 switch (opt) { 1449 /* portmask */ 1450 case 'p': 1451 enabled_port_mask = parse_portmask(optarg); 1452 if (enabled_port_mask == 0) { 1453 printf("invalid portmask\n"); 1454 print_usage(prgname); 1455 return -1; 1456 } 1457 break; 1458 case 'P': 1459 printf("Promiscuous mode selected\n"); 1460 promiscuous_on = 1; 1461 break; 1462 case 'l': 1463 limit = parse_max_pkt_len(optarg); 1464 freq_tlb[LOW] = limit; 1465 break; 1466 case 'm': 1467 limit = parse_max_pkt_len(optarg); 1468 freq_tlb[MED] = limit; 1469 break; 1470 case 'h': 1471 limit = parse_max_pkt_len(optarg); 1472 freq_tlb[HGH] = limit; 1473 break; 1474 /* long options */ 1475 case 0: 1476 if (!strncmp(lgopts[option_index].name, "config", 6)) { 1477 ret = parse_config(optarg); 1478 if (ret) { 1479 printf("invalid config\n"); 1480 print_usage(prgname); 1481 return -1; 1482 } 1483 } 1484 1485 if (!strncmp(lgopts[option_index].name, 1486 "perf-config", 11)) { 1487 ret = parse_perf_config(optarg); 1488 if (ret) { 1489 printf("invalid perf-config\n"); 1490 print_usage(prgname); 1491 return -1; 1492 } 1493 } 1494 1495 if (!strncmp(lgopts[option_index].name, 1496 "high-perf-cores", 15)) { 1497 ret = parse_perf_core_list(optarg); 1498 if (ret) { 1499 printf("invalid high-perf-cores\n"); 1500 print_usage(prgname); 1501 return -1; 1502 } 1503 } 1504 1505 if (!strncmp(lgopts[option_index].name, 1506 "no-numa", 7)) { 1507 printf("numa is disabled \n"); 1508 numa_on = 0; 1509 } 1510 1511 if (!strncmp(lgopts[option_index].name, 1512 "empty-poll", 10)) { 1513 printf("empty-poll is enabled\n"); 1514 empty_poll_on = true; 1515 ret = parse_ep_config(optarg); 1516 1517 if (ret) { 1518 printf("invalid empty poll config\n"); 1519 print_usage(prgname); 1520 return -1; 1521 } 1522 1523 } 1524 1525 if (!strncmp(lgopts[option_index].name, 1526 "enable-jumbo", 12)) { 1527 struct option lenopts = 1528 {"max-pkt-len", required_argument, \ 1529 0, 0}; 1530 1531 printf("jumbo frame is enabled \n"); 1532 port_conf.rxmode.offloads |= 1533 DEV_RX_OFFLOAD_JUMBO_FRAME; 1534 port_conf.txmode.offloads |= 1535 DEV_TX_OFFLOAD_MULTI_SEGS; 1536 1537 /** 1538 * if no max-pkt-len set, use the default value 1539 * RTE_ETHER_MAX_LEN 1540 */ 1541 if (0 == getopt_long(argc, argvopt, "", 1542 &lenopts, &option_index)) { 1543 ret = parse_max_pkt_len(optarg); 1544 if ((ret < 64) || 1545 (ret > MAX_JUMBO_PKT_LEN)){ 1546 printf("invalid packet " 1547 "length\n"); 1548 print_usage(prgname); 1549 return -1; 1550 } 1551 port_conf.rxmode.max_rx_pkt_len = ret; 1552 } 1553 printf("set jumbo frame " 1554 "max packet length to %u\n", 1555 (unsigned int)port_conf.rxmode.max_rx_pkt_len); 1556 } 1557 1558 if (!strncmp(lgopts[option_index].name, 1559 CMD_LINE_OPT_PARSE_PTYPE, 1560 sizeof(CMD_LINE_OPT_PARSE_PTYPE))) { 1561 printf("soft parse-ptype is enabled\n"); 1562 parse_ptype = 1; 1563 } 1564 1565 break; 1566 1567 default: 1568 print_usage(prgname); 1569 return -1; 1570 } 1571 } 1572 1573 if (optind >= 0) 1574 argv[optind-1] = prgname; 1575 1576 ret = optind-1; 1577 optind = 1; /* reset getopt lib */ 1578 return ret; 1579 } 1580 1581 static void 1582 print_ethaddr(const char *name, const struct rte_ether_addr *eth_addr) 1583 { 1584 char buf[RTE_ETHER_ADDR_FMT_SIZE]; 1585 rte_ether_format_addr(buf, RTE_ETHER_ADDR_FMT_SIZE, eth_addr); 1586 printf("%s%s", name, buf); 1587 } 1588 1589 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 1590 static void 1591 setup_hash(int socketid) 1592 { 1593 struct rte_hash_parameters ipv4_l3fwd_hash_params = { 1594 .name = NULL, 1595 .entries = L3FWD_HASH_ENTRIES, 1596 .key_len = sizeof(struct ipv4_5tuple), 1597 .hash_func = DEFAULT_HASH_FUNC, 1598 .hash_func_init_val = 0, 1599 }; 1600 1601 struct rte_hash_parameters ipv6_l3fwd_hash_params = { 1602 .name = NULL, 1603 .entries = L3FWD_HASH_ENTRIES, 1604 .key_len = sizeof(struct ipv6_5tuple), 1605 .hash_func = DEFAULT_HASH_FUNC, 1606 .hash_func_init_val = 0, 1607 }; 1608 1609 unsigned i; 1610 int ret; 1611 char s[64]; 1612 1613 /* create ipv4 hash */ 1614 snprintf(s, sizeof(s), "ipv4_l3fwd_hash_%d", socketid); 1615 ipv4_l3fwd_hash_params.name = s; 1616 ipv4_l3fwd_hash_params.socket_id = socketid; 1617 ipv4_l3fwd_lookup_struct[socketid] = 1618 rte_hash_create(&ipv4_l3fwd_hash_params); 1619 if (ipv4_l3fwd_lookup_struct[socketid] == NULL) 1620 rte_exit(EXIT_FAILURE, "Unable to create the l3fwd hash on " 1621 "socket %d\n", socketid); 1622 1623 /* create ipv6 hash */ 1624 snprintf(s, sizeof(s), "ipv6_l3fwd_hash_%d", socketid); 1625 ipv6_l3fwd_hash_params.name = s; 1626 ipv6_l3fwd_hash_params.socket_id = socketid; 1627 ipv6_l3fwd_lookup_struct[socketid] = 1628 rte_hash_create(&ipv6_l3fwd_hash_params); 1629 if (ipv6_l3fwd_lookup_struct[socketid] == NULL) 1630 rte_exit(EXIT_FAILURE, "Unable to create the l3fwd hash on " 1631 "socket %d\n", socketid); 1632 1633 1634 /* populate the ipv4 hash */ 1635 for (i = 0; i < IPV4_L3FWD_NUM_ROUTES; i++) { 1636 ret = rte_hash_add_key (ipv4_l3fwd_lookup_struct[socketid], 1637 (void *) &ipv4_l3fwd_route_array[i].key); 1638 if (ret < 0) { 1639 rte_exit(EXIT_FAILURE, "Unable to add entry %u to the" 1640 "l3fwd hash on socket %d\n", i, socketid); 1641 } 1642 ipv4_l3fwd_out_if[ret] = ipv4_l3fwd_route_array[i].if_out; 1643 printf("Hash: Adding key\n"); 1644 print_ipv4_key(ipv4_l3fwd_route_array[i].key); 1645 } 1646 1647 /* populate the ipv6 hash */ 1648 for (i = 0; i < IPV6_L3FWD_NUM_ROUTES; i++) { 1649 ret = rte_hash_add_key (ipv6_l3fwd_lookup_struct[socketid], 1650 (void *) &ipv6_l3fwd_route_array[i].key); 1651 if (ret < 0) { 1652 rte_exit(EXIT_FAILURE, "Unable to add entry %u to the" 1653 "l3fwd hash on socket %d\n", i, socketid); 1654 } 1655 ipv6_l3fwd_out_if[ret] = ipv6_l3fwd_route_array[i].if_out; 1656 printf("Hash: Adding key\n"); 1657 print_ipv6_key(ipv6_l3fwd_route_array[i].key); 1658 } 1659 } 1660 #endif 1661 1662 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 1663 static void 1664 setup_lpm(int socketid) 1665 { 1666 unsigned i; 1667 int ret; 1668 char s[64]; 1669 1670 /* create the LPM table */ 1671 struct rte_lpm_config lpm_ipv4_config; 1672 1673 lpm_ipv4_config.max_rules = IPV4_L3FWD_LPM_MAX_RULES; 1674 lpm_ipv4_config.number_tbl8s = 256; 1675 lpm_ipv4_config.flags = 0; 1676 1677 snprintf(s, sizeof(s), "IPV4_L3FWD_LPM_%d", socketid); 1678 ipv4_l3fwd_lookup_struct[socketid] = 1679 rte_lpm_create(s, socketid, &lpm_ipv4_config); 1680 if (ipv4_l3fwd_lookup_struct[socketid] == NULL) 1681 rte_exit(EXIT_FAILURE, "Unable to create the l3fwd LPM table" 1682 " on socket %d\n", socketid); 1683 1684 /* populate the LPM table */ 1685 for (i = 0; i < IPV4_L3FWD_NUM_ROUTES; i++) { 1686 ret = rte_lpm_add(ipv4_l3fwd_lookup_struct[socketid], 1687 ipv4_l3fwd_route_array[i].ip, 1688 ipv4_l3fwd_route_array[i].depth, 1689 ipv4_l3fwd_route_array[i].if_out); 1690 1691 if (ret < 0) { 1692 rte_exit(EXIT_FAILURE, "Unable to add entry %u to the " 1693 "l3fwd LPM table on socket %d\n", 1694 i, socketid); 1695 } 1696 1697 printf("LPM: Adding route 0x%08x / %d (%d)\n", 1698 (unsigned)ipv4_l3fwd_route_array[i].ip, 1699 ipv4_l3fwd_route_array[i].depth, 1700 ipv4_l3fwd_route_array[i].if_out); 1701 } 1702 } 1703 #endif 1704 1705 static int 1706 init_mem(unsigned nb_mbuf) 1707 { 1708 struct lcore_conf *qconf; 1709 int socketid; 1710 unsigned lcore_id; 1711 char s[64]; 1712 1713 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { 1714 if (rte_lcore_is_enabled(lcore_id) == 0) 1715 continue; 1716 1717 if (numa_on) 1718 socketid = rte_lcore_to_socket_id(lcore_id); 1719 else 1720 socketid = 0; 1721 1722 if (socketid >= NB_SOCKETS) { 1723 rte_exit(EXIT_FAILURE, "Socket %d of lcore %u is " 1724 "out of range %d\n", socketid, 1725 lcore_id, NB_SOCKETS); 1726 } 1727 if (pktmbuf_pool[socketid] == NULL) { 1728 snprintf(s, sizeof(s), "mbuf_pool_%d", socketid); 1729 pktmbuf_pool[socketid] = 1730 rte_pktmbuf_pool_create(s, nb_mbuf, 1731 MEMPOOL_CACHE_SIZE, 0, 1732 RTE_MBUF_DEFAULT_BUF_SIZE, 1733 socketid); 1734 if (pktmbuf_pool[socketid] == NULL) 1735 rte_exit(EXIT_FAILURE, 1736 "Cannot init mbuf pool on socket %d\n", 1737 socketid); 1738 else 1739 printf("Allocated mbuf pool on socket %d\n", 1740 socketid); 1741 1742 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 1743 setup_lpm(socketid); 1744 #else 1745 setup_hash(socketid); 1746 #endif 1747 } 1748 qconf = &lcore_conf[lcore_id]; 1749 qconf->ipv4_lookup_struct = ipv4_l3fwd_lookup_struct[socketid]; 1750 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 1751 qconf->ipv6_lookup_struct = ipv6_l3fwd_lookup_struct[socketid]; 1752 #endif 1753 } 1754 return 0; 1755 } 1756 1757 /* Check the link status of all ports in up to 9s, and print them finally */ 1758 static void 1759 check_all_ports_link_status(uint32_t port_mask) 1760 { 1761 #define CHECK_INTERVAL 100 /* 100ms */ 1762 #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */ 1763 uint8_t count, all_ports_up, print_flag = 0; 1764 uint16_t portid; 1765 struct rte_eth_link link; 1766 1767 printf("\nChecking link status"); 1768 fflush(stdout); 1769 for (count = 0; count <= MAX_CHECK_TIME; count++) { 1770 all_ports_up = 1; 1771 RTE_ETH_FOREACH_DEV(portid) { 1772 if ((port_mask & (1 << portid)) == 0) 1773 continue; 1774 memset(&link, 0, sizeof(link)); 1775 rte_eth_link_get_nowait(portid, &link); 1776 /* print link status if flag set */ 1777 if (print_flag == 1) { 1778 if (link.link_status) 1779 printf("Port %d Link Up - speed %u " 1780 "Mbps - %s\n", (uint8_t)portid, 1781 (unsigned)link.link_speed, 1782 (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? 1783 ("full-duplex") : ("half-duplex\n")); 1784 else 1785 printf("Port %d Link Down\n", 1786 (uint8_t)portid); 1787 continue; 1788 } 1789 /* clear all_ports_up flag if any link down */ 1790 if (link.link_status == ETH_LINK_DOWN) { 1791 all_ports_up = 0; 1792 break; 1793 } 1794 } 1795 /* after finally printing all link status, get out */ 1796 if (print_flag == 1) 1797 break; 1798 1799 if (all_ports_up == 0) { 1800 printf("."); 1801 fflush(stdout); 1802 rte_delay_ms(CHECK_INTERVAL); 1803 } 1804 1805 /* set the print_flag if all ports up or timeout */ 1806 if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) { 1807 print_flag = 1; 1808 printf("done\n"); 1809 } 1810 } 1811 } 1812 1813 static int check_ptype(uint16_t portid) 1814 { 1815 int i, ret; 1816 int ptype_l3_ipv4 = 0; 1817 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 1818 int ptype_l3_ipv6 = 0; 1819 #endif 1820 uint32_t ptype_mask = RTE_PTYPE_L3_MASK; 1821 1822 ret = rte_eth_dev_get_supported_ptypes(portid, ptype_mask, NULL, 0); 1823 if (ret <= 0) 1824 return 0; 1825 1826 uint32_t ptypes[ret]; 1827 1828 ret = rte_eth_dev_get_supported_ptypes(portid, ptype_mask, ptypes, ret); 1829 for (i = 0; i < ret; ++i) { 1830 if (ptypes[i] & RTE_PTYPE_L3_IPV4) 1831 ptype_l3_ipv4 = 1; 1832 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 1833 if (ptypes[i] & RTE_PTYPE_L3_IPV6) 1834 ptype_l3_ipv6 = 1; 1835 #endif 1836 } 1837 1838 if (ptype_l3_ipv4 == 0) 1839 printf("port %d cannot parse RTE_PTYPE_L3_IPV4\n", portid); 1840 1841 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 1842 if (ptype_l3_ipv6 == 0) 1843 printf("port %d cannot parse RTE_PTYPE_L3_IPV6\n", portid); 1844 #endif 1845 1846 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 1847 if (ptype_l3_ipv4) 1848 #else /* APP_LOOKUP_EXACT_MATCH */ 1849 if (ptype_l3_ipv4 && ptype_l3_ipv6) 1850 #endif 1851 return 1; 1852 1853 return 0; 1854 1855 } 1856 1857 static int 1858 init_power_library(void) 1859 { 1860 int ret = 0, lcore_id; 1861 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { 1862 if (rte_lcore_is_enabled(lcore_id)) { 1863 /* init power management library */ 1864 ret = rte_power_init(lcore_id); 1865 if (ret) 1866 RTE_LOG(ERR, POWER, 1867 "Library initialization failed on core %u\n", 1868 lcore_id); 1869 } 1870 } 1871 return ret; 1872 } 1873 static void 1874 empty_poll_setup_timer(void) 1875 { 1876 int lcore_id = rte_lcore_id(); 1877 uint64_t hz = rte_get_timer_hz(); 1878 1879 struct ep_params *ep_ptr = ep_params; 1880 1881 ep_ptr->interval_ticks = hz / INTERVALS_PER_SECOND; 1882 1883 rte_timer_reset_sync(&ep_ptr->timer0, 1884 ep_ptr->interval_ticks, 1885 PERIODICAL, 1886 lcore_id, 1887 rte_empty_poll_detection, 1888 (void *)ep_ptr); 1889 1890 } 1891 static int 1892 launch_timer(unsigned int lcore_id) 1893 { 1894 int64_t prev_tsc = 0, cur_tsc, diff_tsc, cycles_10ms; 1895 1896 RTE_SET_USED(lcore_id); 1897 1898 1899 if (rte_get_master_lcore() != lcore_id) { 1900 rte_panic("timer on lcore:%d which is not master core:%d\n", 1901 lcore_id, 1902 rte_get_master_lcore()); 1903 } 1904 1905 RTE_LOG(INFO, POWER, "Bring up the Timer\n"); 1906 1907 empty_poll_setup_timer(); 1908 1909 cycles_10ms = rte_get_timer_hz() / 100; 1910 1911 while (!is_done()) { 1912 cur_tsc = rte_rdtsc(); 1913 diff_tsc = cur_tsc - prev_tsc; 1914 if (diff_tsc > cycles_10ms) { 1915 rte_timer_manage(); 1916 prev_tsc = cur_tsc; 1917 cycles_10ms = rte_get_timer_hz() / 100; 1918 } 1919 } 1920 1921 RTE_LOG(INFO, POWER, "Timer_subsystem is done\n"); 1922 1923 return 0; 1924 } 1925 1926 1927 int 1928 main(int argc, char **argv) 1929 { 1930 struct lcore_conf *qconf; 1931 struct rte_eth_dev_info dev_info; 1932 struct rte_eth_txconf *txconf; 1933 int ret; 1934 uint16_t nb_ports; 1935 uint16_t queueid; 1936 unsigned lcore_id; 1937 uint64_t hz; 1938 uint32_t n_tx_queue, nb_lcores; 1939 uint32_t dev_rxq_num, dev_txq_num; 1940 uint8_t nb_rx_queue, queue, socketid; 1941 uint16_t portid; 1942 1943 /* catch SIGINT and restore cpufreq governor to ondemand */ 1944 signal(SIGINT, signal_exit_now); 1945 1946 /* init EAL */ 1947 ret = rte_eal_init(argc, argv); 1948 if (ret < 0) 1949 rte_exit(EXIT_FAILURE, "Invalid EAL parameters\n"); 1950 argc -= ret; 1951 argv += ret; 1952 1953 /* init RTE timer library to be used late */ 1954 rte_timer_subsystem_init(); 1955 1956 /* parse application arguments (after the EAL ones) */ 1957 ret = parse_args(argc, argv); 1958 if (ret < 0) 1959 rte_exit(EXIT_FAILURE, "Invalid L3FWD parameters\n"); 1960 1961 if (init_power_library()) 1962 RTE_LOG(ERR, L3FWD_POWER, "init_power_library failed\n"); 1963 1964 if (update_lcore_params() < 0) 1965 rte_exit(EXIT_FAILURE, "update_lcore_params failed\n"); 1966 1967 if (check_lcore_params() < 0) 1968 rte_exit(EXIT_FAILURE, "check_lcore_params failed\n"); 1969 1970 ret = init_lcore_rx_queues(); 1971 if (ret < 0) 1972 rte_exit(EXIT_FAILURE, "init_lcore_rx_queues failed\n"); 1973 1974 nb_ports = rte_eth_dev_count_avail(); 1975 1976 if (check_port_config() < 0) 1977 rte_exit(EXIT_FAILURE, "check_port_config failed\n"); 1978 1979 nb_lcores = rte_lcore_count(); 1980 1981 /* initialize all ports */ 1982 RTE_ETH_FOREACH_DEV(portid) { 1983 struct rte_eth_conf local_port_conf = port_conf; 1984 1985 /* skip ports that are not enabled */ 1986 if ((enabled_port_mask & (1 << portid)) == 0) { 1987 printf("\nSkipping disabled port %d\n", portid); 1988 continue; 1989 } 1990 1991 /* init port */ 1992 printf("Initializing port %d ... ", portid ); 1993 fflush(stdout); 1994 1995 rte_eth_dev_info_get(portid, &dev_info); 1996 dev_rxq_num = dev_info.max_rx_queues; 1997 dev_txq_num = dev_info.max_tx_queues; 1998 1999 nb_rx_queue = get_port_n_rx_queues(portid); 2000 if (nb_rx_queue > dev_rxq_num) 2001 rte_exit(EXIT_FAILURE, 2002 "Cannot configure not existed rxq: " 2003 "port=%d\n", portid); 2004 2005 n_tx_queue = nb_lcores; 2006 if (n_tx_queue > dev_txq_num) 2007 n_tx_queue = dev_txq_num; 2008 printf("Creating queues: nb_rxq=%d nb_txq=%u... ", 2009 nb_rx_queue, (unsigned)n_tx_queue ); 2010 /* If number of Rx queue is 0, no need to enable Rx interrupt */ 2011 if (nb_rx_queue == 0) 2012 local_port_conf.intr_conf.rxq = 0; 2013 rte_eth_dev_info_get(portid, &dev_info); 2014 if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE) 2015 local_port_conf.txmode.offloads |= 2016 DEV_TX_OFFLOAD_MBUF_FAST_FREE; 2017 2018 local_port_conf.rx_adv_conf.rss_conf.rss_hf &= 2019 dev_info.flow_type_rss_offloads; 2020 if (local_port_conf.rx_adv_conf.rss_conf.rss_hf != 2021 port_conf.rx_adv_conf.rss_conf.rss_hf) { 2022 printf("Port %u modified RSS hash function based on hardware support," 2023 "requested:%#"PRIx64" configured:%#"PRIx64"\n", 2024 portid, 2025 port_conf.rx_adv_conf.rss_conf.rss_hf, 2026 local_port_conf.rx_adv_conf.rss_conf.rss_hf); 2027 } 2028 2029 ret = rte_eth_dev_configure(portid, nb_rx_queue, 2030 (uint16_t)n_tx_queue, &local_port_conf); 2031 if (ret < 0) 2032 rte_exit(EXIT_FAILURE, "Cannot configure device: " 2033 "err=%d, port=%d\n", ret, portid); 2034 2035 ret = rte_eth_dev_adjust_nb_rx_tx_desc(portid, &nb_rxd, 2036 &nb_txd); 2037 if (ret < 0) 2038 rte_exit(EXIT_FAILURE, 2039 "Cannot adjust number of descriptors: err=%d, port=%d\n", 2040 ret, portid); 2041 2042 rte_eth_macaddr_get(portid, &ports_eth_addr[portid]); 2043 print_ethaddr(" Address:", &ports_eth_addr[portid]); 2044 printf(", "); 2045 2046 /* init memory */ 2047 ret = init_mem(NB_MBUF); 2048 if (ret < 0) 2049 rte_exit(EXIT_FAILURE, "init_mem failed\n"); 2050 2051 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { 2052 if (rte_lcore_is_enabled(lcore_id) == 0) 2053 continue; 2054 2055 /* Initialize TX buffers */ 2056 qconf = &lcore_conf[lcore_id]; 2057 qconf->tx_buffer[portid] = rte_zmalloc_socket("tx_buffer", 2058 RTE_ETH_TX_BUFFER_SIZE(MAX_PKT_BURST), 0, 2059 rte_eth_dev_socket_id(portid)); 2060 if (qconf->tx_buffer[portid] == NULL) 2061 rte_exit(EXIT_FAILURE, "Can't allocate tx buffer for port %u\n", 2062 portid); 2063 2064 rte_eth_tx_buffer_init(qconf->tx_buffer[portid], MAX_PKT_BURST); 2065 } 2066 2067 /* init one TX queue per couple (lcore,port) */ 2068 queueid = 0; 2069 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { 2070 if (rte_lcore_is_enabled(lcore_id) == 0) 2071 continue; 2072 2073 if (queueid >= dev_txq_num) 2074 continue; 2075 2076 if (numa_on) 2077 socketid = \ 2078 (uint8_t)rte_lcore_to_socket_id(lcore_id); 2079 else 2080 socketid = 0; 2081 2082 printf("txq=%u,%d,%d ", lcore_id, queueid, socketid); 2083 fflush(stdout); 2084 2085 txconf = &dev_info.default_txconf; 2086 txconf->offloads = local_port_conf.txmode.offloads; 2087 ret = rte_eth_tx_queue_setup(portid, queueid, nb_txd, 2088 socketid, txconf); 2089 if (ret < 0) 2090 rte_exit(EXIT_FAILURE, 2091 "rte_eth_tx_queue_setup: err=%d, " 2092 "port=%d\n", ret, portid); 2093 2094 qconf = &lcore_conf[lcore_id]; 2095 qconf->tx_queue_id[portid] = queueid; 2096 queueid++; 2097 2098 qconf->tx_port_id[qconf->n_tx_port] = portid; 2099 qconf->n_tx_port++; 2100 } 2101 printf("\n"); 2102 } 2103 2104 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { 2105 if (rte_lcore_is_enabled(lcore_id) == 0) 2106 continue; 2107 2108 if (empty_poll_on == false) { 2109 /* init timer structures for each enabled lcore */ 2110 rte_timer_init(&power_timers[lcore_id]); 2111 hz = rte_get_timer_hz(); 2112 rte_timer_reset(&power_timers[lcore_id], 2113 hz/TIMER_NUMBER_PER_SECOND, 2114 SINGLE, lcore_id, 2115 power_timer_cb, NULL); 2116 } 2117 qconf = &lcore_conf[lcore_id]; 2118 printf("\nInitializing rx queues on lcore %u ... ", lcore_id ); 2119 fflush(stdout); 2120 /* init RX queues */ 2121 for(queue = 0; queue < qconf->n_rx_queue; ++queue) { 2122 struct rte_eth_rxconf rxq_conf; 2123 struct rte_eth_dev *dev; 2124 struct rte_eth_conf *conf; 2125 2126 portid = qconf->rx_queue_list[queue].port_id; 2127 queueid = qconf->rx_queue_list[queue].queue_id; 2128 dev = &rte_eth_devices[portid]; 2129 conf = &dev->data->dev_conf; 2130 2131 if (numa_on) 2132 socketid = \ 2133 (uint8_t)rte_lcore_to_socket_id(lcore_id); 2134 else 2135 socketid = 0; 2136 2137 printf("rxq=%d,%d,%d ", portid, queueid, socketid); 2138 fflush(stdout); 2139 2140 rte_eth_dev_info_get(portid, &dev_info); 2141 rxq_conf = dev_info.default_rxconf; 2142 rxq_conf.offloads = conf->rxmode.offloads; 2143 ret = rte_eth_rx_queue_setup(portid, queueid, nb_rxd, 2144 socketid, &rxq_conf, 2145 pktmbuf_pool[socketid]); 2146 if (ret < 0) 2147 rte_exit(EXIT_FAILURE, 2148 "rte_eth_rx_queue_setup: err=%d, " 2149 "port=%d\n", ret, portid); 2150 2151 if (parse_ptype) { 2152 if (add_cb_parse_ptype(portid, queueid) < 0) 2153 rte_exit(EXIT_FAILURE, 2154 "Fail to add ptype cb\n"); 2155 } else if (!check_ptype(portid)) 2156 rte_exit(EXIT_FAILURE, 2157 "PMD can not provide needed ptypes\n"); 2158 } 2159 } 2160 2161 printf("\n"); 2162 2163 /* start ports */ 2164 RTE_ETH_FOREACH_DEV(portid) { 2165 if ((enabled_port_mask & (1 << portid)) == 0) { 2166 continue; 2167 } 2168 /* Start device */ 2169 ret = rte_eth_dev_start(portid); 2170 if (ret < 0) 2171 rte_exit(EXIT_FAILURE, "rte_eth_dev_start: err=%d, " 2172 "port=%d\n", ret, portid); 2173 /* 2174 * If enabled, put device in promiscuous mode. 2175 * This allows IO forwarding mode to forward packets 2176 * to itself through 2 cross-connected ports of the 2177 * target machine. 2178 */ 2179 if (promiscuous_on) 2180 rte_eth_promiscuous_enable(portid); 2181 /* initialize spinlock for each port */ 2182 rte_spinlock_init(&(locks[portid])); 2183 } 2184 2185 check_all_ports_link_status(enabled_port_mask); 2186 2187 if (empty_poll_on == true) { 2188 2189 if (empty_poll_train) { 2190 policy.state = TRAINING; 2191 } else { 2192 policy.state = MED_NORMAL; 2193 policy.med_base_edpi = ep_med_edpi; 2194 policy.hgh_base_edpi = ep_hgh_edpi; 2195 } 2196 2197 ret = rte_power_empty_poll_stat_init(&ep_params, 2198 freq_tlb, 2199 &policy); 2200 if (ret < 0) 2201 rte_exit(EXIT_FAILURE, "empty poll init failed"); 2202 } 2203 2204 2205 /* launch per-lcore init on every lcore */ 2206 if (empty_poll_on == false) { 2207 rte_eal_mp_remote_launch(main_loop, NULL, CALL_MASTER); 2208 } else { 2209 empty_poll_stop = false; 2210 rte_eal_mp_remote_launch(main_empty_poll_loop, NULL, 2211 SKIP_MASTER); 2212 } 2213 2214 if (empty_poll_on == true) 2215 launch_timer(rte_lcore_id()); 2216 2217 RTE_LCORE_FOREACH_SLAVE(lcore_id) { 2218 if (rte_eal_wait_lcore(lcore_id) < 0) 2219 return -1; 2220 } 2221 2222 if (empty_poll_on) 2223 rte_power_empty_poll_stat_free(); 2224 2225 return 0; 2226 } 2227