1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2018 Intel Corporation 3 */ 4 5 #include <stdio.h> 6 #include <stdlib.h> 7 #include <stdint.h> 8 #include <inttypes.h> 9 #include <sys/types.h> 10 #include <string.h> 11 #include <sys/queue.h> 12 #include <stdarg.h> 13 #include <errno.h> 14 #include <getopt.h> 15 #include <unistd.h> 16 #include <signal.h> 17 #include <math.h> 18 19 #include <rte_common.h> 20 #include <rte_byteorder.h> 21 #include <rte_log.h> 22 #include <rte_malloc.h> 23 #include <rte_memory.h> 24 #include <rte_memcpy.h> 25 #include <rte_eal.h> 26 #include <rte_launch.h> 27 #include <rte_cycles.h> 28 #include <rte_prefetch.h> 29 #include <rte_lcore.h> 30 #include <rte_per_lcore.h> 31 #include <rte_branch_prediction.h> 32 #include <rte_interrupts.h> 33 #include <rte_random.h> 34 #include <rte_debug.h> 35 #include <rte_ether.h> 36 #include <rte_ethdev.h> 37 #include <rte_mempool.h> 38 #include <rte_mbuf.h> 39 #include <rte_ip.h> 40 #include <rte_tcp.h> 41 #include <rte_udp.h> 42 #include <rte_string_fns.h> 43 #include <rte_timer.h> 44 #include <rte_power.h> 45 #include <rte_spinlock.h> 46 #include <rte_power_empty_poll.h> 47 #include <rte_metrics.h> 48 #include <rte_telemetry.h> 49 #include <rte_power_pmd_mgmt.h> 50 51 #include "perf_core.h" 52 #include "main.h" 53 54 #define RTE_LOGTYPE_L3FWD_POWER RTE_LOGTYPE_USER1 55 56 #define MAX_PKT_BURST 32 57 58 #define MIN_ZERO_POLL_COUNT 10 59 60 /* 100 ms interval */ 61 #define TIMER_NUMBER_PER_SECOND 10 62 /* (10ms) */ 63 #define INTERVALS_PER_SECOND 100 64 /* 100000 us */ 65 #define SCALING_PERIOD (1000000/TIMER_NUMBER_PER_SECOND) 66 #define SCALING_DOWN_TIME_RATIO_THRESHOLD 0.25 67 68 #define APP_LOOKUP_EXACT_MATCH 0 69 #define APP_LOOKUP_LPM 1 70 #define DO_RFC_1812_CHECKS 71 72 #ifndef APP_LOOKUP_METHOD 73 #define APP_LOOKUP_METHOD APP_LOOKUP_LPM 74 #endif 75 76 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 77 #include <rte_hash.h> 78 #elif (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 79 #include <rte_lpm.h> 80 #else 81 #error "APP_LOOKUP_METHOD set to incorrect value" 82 #endif 83 84 #ifndef IPv6_BYTES 85 #define IPv6_BYTES_FMT "%02x%02x:%02x%02x:%02x%02x:%02x%02x:"\ 86 "%02x%02x:%02x%02x:%02x%02x:%02x%02x" 87 #define IPv6_BYTES(addr) \ 88 addr[0], addr[1], addr[2], addr[3], \ 89 addr[4], addr[5], addr[6], addr[7], \ 90 addr[8], addr[9], addr[10], addr[11],\ 91 addr[12], addr[13],addr[14], addr[15] 92 #endif 93 94 #define MAX_JUMBO_PKT_LEN 9600 95 96 #define IPV6_ADDR_LEN 16 97 98 #define MEMPOOL_CACHE_SIZE 256 99 100 /* 101 * This expression is used to calculate the number of mbufs needed depending on 102 * user input, taking into account memory for rx and tx hardware rings, cache 103 * per lcore and mtable per port per lcore. RTE_MAX is used to ensure that 104 * NB_MBUF never goes below a minimum value of 8192. 105 */ 106 107 #define NB_MBUF RTE_MAX ( \ 108 (nb_ports*nb_rx_queue*nb_rxd + \ 109 nb_ports*nb_lcores*MAX_PKT_BURST + \ 110 nb_ports*n_tx_queue*nb_txd + \ 111 nb_lcores*MEMPOOL_CACHE_SIZE), \ 112 (unsigned)8192) 113 114 #define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */ 115 116 #define NB_SOCKETS 8 117 118 /* Configure how many packets ahead to prefetch, when reading packets */ 119 #define PREFETCH_OFFSET 3 120 121 /* 122 * Configurable number of RX/TX ring descriptors 123 */ 124 #define RTE_TEST_RX_DESC_DEFAULT 1024 125 #define RTE_TEST_TX_DESC_DEFAULT 1024 126 127 /* 128 * These two thresholds were decided on by running the training algorithm on 129 * a 2.5GHz Xeon. These defaults can be overridden by supplying non-zero values 130 * for the med_threshold and high_threshold parameters on the command line. 131 */ 132 #define EMPTY_POLL_MED_THRESHOLD 350000UL 133 #define EMPTY_POLL_HGH_THRESHOLD 580000UL 134 135 #define NUM_TELSTATS RTE_DIM(telstats_strings) 136 137 static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT; 138 static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT; 139 140 /* ethernet addresses of ports */ 141 static struct rte_ether_addr ports_eth_addr[RTE_MAX_ETHPORTS]; 142 143 /* ethernet addresses of ports */ 144 static rte_spinlock_t locks[RTE_MAX_ETHPORTS]; 145 146 /* mask of enabled ports */ 147 static uint32_t enabled_port_mask = 0; 148 /* Ports set in promiscuous mode off by default. */ 149 static int promiscuous_on = 0; 150 /* NUMA is enabled by default. */ 151 static int numa_on = 1; 152 static bool empty_poll_stop; 153 static bool empty_poll_train; 154 volatile bool quit_signal; 155 static struct ep_params *ep_params; 156 static struct ep_policy policy; 157 static long ep_med_edpi, ep_hgh_edpi; 158 /* timer to update telemetry every 500ms */ 159 static struct rte_timer telemetry_timer; 160 161 /* stats index returned by metrics lib */ 162 int telstats_index; 163 164 struct telstats_name { 165 char name[RTE_ETH_XSTATS_NAME_SIZE]; 166 }; 167 168 /* telemetry stats to be reported */ 169 const struct telstats_name telstats_strings[] = { 170 {"empty_poll"}, 171 {"full_poll"}, 172 {"busy_percent"} 173 }; 174 175 /* core busyness in percentage */ 176 enum busy_rate { 177 ZERO = 0, 178 PARTIAL = 50, 179 FULL = 100 180 }; 181 182 /* reference poll count to measure core busyness */ 183 #define DEFAULT_COUNT 10000 184 /* 185 * reference CYCLES to be used to 186 * measure core busyness based on poll count 187 */ 188 #define MIN_CYCLES 1500000ULL 189 #define MAX_CYCLES 22000000ULL 190 191 /* (500ms) */ 192 #define TELEMETRY_INTERVALS_PER_SEC 2 193 194 static int parse_ptype; /**< Parse packet type using rx callback, and */ 195 /**< disabled by default */ 196 197 enum appmode { 198 APP_MODE_DEFAULT = 0, 199 APP_MODE_LEGACY, 200 APP_MODE_EMPTY_POLL, 201 APP_MODE_TELEMETRY, 202 APP_MODE_INTERRUPT, 203 APP_MODE_PMD_MGMT 204 }; 205 206 enum appmode app_mode; 207 208 static enum rte_power_pmd_mgmt_type pmgmt_type; 209 bool baseline_enabled; 210 211 enum freq_scale_hint_t 212 { 213 FREQ_LOWER = -1, 214 FREQ_CURRENT = 0, 215 FREQ_HIGHER = 1, 216 FREQ_HIGHEST = 2 217 }; 218 219 struct lcore_rx_queue { 220 uint16_t port_id; 221 uint8_t queue_id; 222 enum freq_scale_hint_t freq_up_hint; 223 uint32_t zero_rx_packet_count; 224 uint32_t idle_hint; 225 } __rte_cache_aligned; 226 227 #define MAX_RX_QUEUE_PER_LCORE 16 228 #define MAX_TX_QUEUE_PER_PORT RTE_MAX_ETHPORTS 229 #define MAX_RX_QUEUE_PER_PORT 128 230 231 #define MAX_RX_QUEUE_INTERRUPT_PER_PORT 16 232 233 234 struct lcore_params lcore_params_array[MAX_LCORE_PARAMS]; 235 static struct lcore_params lcore_params_array_default[] = { 236 {0, 0, 2}, 237 {0, 1, 2}, 238 {0, 2, 2}, 239 {1, 0, 2}, 240 {1, 1, 2}, 241 {1, 2, 2}, 242 {2, 0, 2}, 243 {3, 0, 3}, 244 {3, 1, 3}, 245 }; 246 247 struct lcore_params *lcore_params = lcore_params_array_default; 248 uint16_t nb_lcore_params = RTE_DIM(lcore_params_array_default); 249 250 static struct rte_eth_conf port_conf = { 251 .rxmode = { 252 .mq_mode = RTE_ETH_MQ_RX_RSS, 253 .split_hdr_size = 0, 254 .offloads = RTE_ETH_RX_OFFLOAD_CHECKSUM, 255 }, 256 .rx_adv_conf = { 257 .rss_conf = { 258 .rss_key = NULL, 259 .rss_hf = RTE_ETH_RSS_UDP, 260 }, 261 }, 262 .txmode = { 263 .mq_mode = RTE_ETH_MQ_TX_NONE, 264 } 265 }; 266 267 static uint32_t max_pkt_len; 268 269 static struct rte_mempool * pktmbuf_pool[NB_SOCKETS]; 270 271 272 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 273 274 #ifdef RTE_ARCH_X86 275 #include <rte_hash_crc.h> 276 #define DEFAULT_HASH_FUNC rte_hash_crc 277 #else 278 #include <rte_jhash.h> 279 #define DEFAULT_HASH_FUNC rte_jhash 280 #endif 281 282 struct ipv4_5tuple { 283 uint32_t ip_dst; 284 uint32_t ip_src; 285 uint16_t port_dst; 286 uint16_t port_src; 287 uint8_t proto; 288 } __rte_packed; 289 290 struct ipv6_5tuple { 291 uint8_t ip_dst[IPV6_ADDR_LEN]; 292 uint8_t ip_src[IPV6_ADDR_LEN]; 293 uint16_t port_dst; 294 uint16_t port_src; 295 uint8_t proto; 296 } __rte_packed; 297 298 struct ipv4_l3fwd_route { 299 struct ipv4_5tuple key; 300 uint8_t if_out; 301 }; 302 303 struct ipv6_l3fwd_route { 304 struct ipv6_5tuple key; 305 uint8_t if_out; 306 }; 307 308 static struct ipv4_l3fwd_route ipv4_l3fwd_route_array[] = { 309 {{RTE_IPV4(100,10,0,1), RTE_IPV4(200,10,0,1), 101, 11, IPPROTO_TCP}, 0}, 310 {{RTE_IPV4(100,20,0,2), RTE_IPV4(200,20,0,2), 102, 12, IPPROTO_TCP}, 1}, 311 {{RTE_IPV4(100,30,0,3), RTE_IPV4(200,30,0,3), 103, 13, IPPROTO_TCP}, 2}, 312 {{RTE_IPV4(100,40,0,4), RTE_IPV4(200,40,0,4), 104, 14, IPPROTO_TCP}, 3}, 313 }; 314 315 static struct ipv6_l3fwd_route ipv6_l3fwd_route_array[] = { 316 { 317 { 318 {0xfe, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 319 0x02, 0x1b, 0x21, 0xff, 0xfe, 0x91, 0x38, 0x05}, 320 {0xfe, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 321 0x02, 0x1e, 0x67, 0xff, 0xfe, 0x0d, 0xb6, 0x0a}, 322 1, 10, IPPROTO_UDP 323 }, 4 324 }, 325 }; 326 327 typedef struct rte_hash lookup_struct_t; 328 static lookup_struct_t *ipv4_l3fwd_lookup_struct[NB_SOCKETS]; 329 static lookup_struct_t *ipv6_l3fwd_lookup_struct[NB_SOCKETS]; 330 331 #define L3FWD_HASH_ENTRIES 1024 332 333 static uint16_t ipv4_l3fwd_out_if[L3FWD_HASH_ENTRIES] __rte_cache_aligned; 334 static uint16_t ipv6_l3fwd_out_if[L3FWD_HASH_ENTRIES] __rte_cache_aligned; 335 #endif 336 337 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 338 struct ipv4_l3fwd_route { 339 uint32_t ip; 340 uint8_t depth; 341 uint8_t if_out; 342 }; 343 344 static struct ipv4_l3fwd_route ipv4_l3fwd_route_array[] = { 345 {RTE_IPV4(1,1,1,0), 24, 0}, 346 {RTE_IPV4(2,1,1,0), 24, 1}, 347 {RTE_IPV4(3,1,1,0), 24, 2}, 348 {RTE_IPV4(4,1,1,0), 24, 3}, 349 {RTE_IPV4(5,1,1,0), 24, 4}, 350 {RTE_IPV4(6,1,1,0), 24, 5}, 351 {RTE_IPV4(7,1,1,0), 24, 6}, 352 {RTE_IPV4(8,1,1,0), 24, 7}, 353 }; 354 355 #define IPV4_L3FWD_LPM_MAX_RULES 1024 356 357 typedef struct rte_lpm lookup_struct_t; 358 static lookup_struct_t *ipv4_l3fwd_lookup_struct[NB_SOCKETS]; 359 #endif 360 361 struct lcore_conf { 362 uint16_t n_rx_queue; 363 struct lcore_rx_queue rx_queue_list[MAX_RX_QUEUE_PER_LCORE]; 364 uint16_t n_tx_port; 365 uint16_t tx_port_id[RTE_MAX_ETHPORTS]; 366 uint16_t tx_queue_id[RTE_MAX_ETHPORTS]; 367 struct rte_eth_dev_tx_buffer *tx_buffer[RTE_MAX_ETHPORTS]; 368 lookup_struct_t * ipv4_lookup_struct; 369 lookup_struct_t * ipv6_lookup_struct; 370 } __rte_cache_aligned; 371 372 struct lcore_stats { 373 /* total sleep time in ms since last frequency scaling down */ 374 uint32_t sleep_time; 375 /* number of long sleep recently */ 376 uint32_t nb_long_sleep; 377 /* freq. scaling up trend */ 378 uint32_t trend; 379 /* total packet processed recently */ 380 uint64_t nb_rx_processed; 381 /* total iterations looped recently */ 382 uint64_t nb_iteration_looped; 383 /* 384 * Represents empty and non empty polls 385 * of rte_eth_rx_burst(); 386 * ep_nep[0] holds non empty polls 387 * i.e. 0 < nb_rx <= MAX_BURST 388 * ep_nep[1] holds empty polls. 389 * i.e. nb_rx == 0 390 */ 391 uint64_t ep_nep[2]; 392 /* 393 * Represents full and empty+partial 394 * polls of rte_eth_rx_burst(); 395 * ep_nep[0] holds empty+partial polls. 396 * i.e. 0 <= nb_rx < MAX_BURST 397 * ep_nep[1] holds full polls 398 * i.e. nb_rx == MAX_BURST 399 */ 400 uint64_t fp_nfp[2]; 401 enum busy_rate br; 402 rte_spinlock_t telemetry_lock; 403 } __rte_cache_aligned; 404 405 static struct lcore_conf lcore_conf[RTE_MAX_LCORE] __rte_cache_aligned; 406 static struct lcore_stats stats[RTE_MAX_LCORE] __rte_cache_aligned; 407 static struct rte_timer power_timers[RTE_MAX_LCORE]; 408 409 static inline uint32_t power_idle_heuristic(uint32_t zero_rx_packet_count); 410 static inline enum freq_scale_hint_t power_freq_scaleup_heuristic( \ 411 unsigned int lcore_id, uint16_t port_id, uint16_t queue_id); 412 413 414 /* 415 * These defaults are using the max frequency index (1), a medium index (9) 416 * and a typical low frequency index (14). These can be adjusted to use 417 * different indexes using the relevant command line parameters. 418 */ 419 static uint8_t freq_tlb[] = {14, 9, 1}; 420 421 static int is_done(void) 422 { 423 return quit_signal; 424 } 425 426 /* exit signal handler */ 427 static void 428 signal_exit_now(int sigtype) 429 { 430 431 if (sigtype == SIGINT) 432 quit_signal = true; 433 434 } 435 436 /* Freqency scale down timer callback */ 437 static void 438 power_timer_cb(__rte_unused struct rte_timer *tim, 439 __rte_unused void *arg) 440 { 441 uint64_t hz; 442 float sleep_time_ratio; 443 unsigned lcore_id = rte_lcore_id(); 444 445 /* accumulate total execution time in us when callback is invoked */ 446 sleep_time_ratio = (float)(stats[lcore_id].sleep_time) / 447 (float)SCALING_PERIOD; 448 /** 449 * check whether need to scale down frequency a step if it sleep a lot. 450 */ 451 if (sleep_time_ratio >= SCALING_DOWN_TIME_RATIO_THRESHOLD) { 452 if (rte_power_freq_down) 453 rte_power_freq_down(lcore_id); 454 } 455 else if ( (unsigned)(stats[lcore_id].nb_rx_processed / 456 stats[lcore_id].nb_iteration_looped) < MAX_PKT_BURST) { 457 /** 458 * scale down a step if average packet per iteration less 459 * than expectation. 460 */ 461 if (rte_power_freq_down) 462 rte_power_freq_down(lcore_id); 463 } 464 465 /** 466 * initialize another timer according to current frequency to ensure 467 * timer interval is relatively fixed. 468 */ 469 hz = rte_get_timer_hz(); 470 rte_timer_reset(&power_timers[lcore_id], hz/TIMER_NUMBER_PER_SECOND, 471 SINGLE, lcore_id, power_timer_cb, NULL); 472 473 stats[lcore_id].nb_rx_processed = 0; 474 stats[lcore_id].nb_iteration_looped = 0; 475 476 stats[lcore_id].sleep_time = 0; 477 } 478 479 /* Enqueue a single packet, and send burst if queue is filled */ 480 static inline int 481 send_single_packet(struct rte_mbuf *m, uint16_t port) 482 { 483 uint32_t lcore_id; 484 struct lcore_conf *qconf; 485 486 lcore_id = rte_lcore_id(); 487 qconf = &lcore_conf[lcore_id]; 488 489 rte_eth_tx_buffer(port, qconf->tx_queue_id[port], 490 qconf->tx_buffer[port], m); 491 492 return 0; 493 } 494 495 #ifdef DO_RFC_1812_CHECKS 496 static inline int 497 is_valid_ipv4_pkt(struct rte_ipv4_hdr *pkt, uint32_t link_len) 498 { 499 /* From http://www.rfc-editor.org/rfc/rfc1812.txt section 5.2.2 */ 500 /* 501 * 1. The packet length reported by the Link Layer must be large 502 * enough to hold the minimum length legal IP datagram (20 bytes). 503 */ 504 if (link_len < sizeof(struct rte_ipv4_hdr)) 505 return -1; 506 507 /* 2. The IP checksum must be correct. */ 508 /* if this is not checked in H/W, check it. */ 509 if ((port_conf.rxmode.offloads & RTE_ETH_RX_OFFLOAD_IPV4_CKSUM) == 0) { 510 uint16_t actual_cksum, expected_cksum; 511 actual_cksum = pkt->hdr_checksum; 512 pkt->hdr_checksum = 0; 513 expected_cksum = rte_ipv4_cksum(pkt); 514 if (actual_cksum != expected_cksum) 515 return -2; 516 } 517 518 /* 519 * 3. The IP version number must be 4. If the version number is not 4 520 * then the packet may be another version of IP, such as IPng or 521 * ST-II. 522 */ 523 if (((pkt->version_ihl) >> 4) != 4) 524 return -3; 525 /* 526 * 4. The IP header length field must be large enough to hold the 527 * minimum length legal IP datagram (20 bytes = 5 words). 528 */ 529 if ((pkt->version_ihl & 0xf) < 5) 530 return -4; 531 532 /* 533 * 5. The IP total length field must be large enough to hold the IP 534 * datagram header, whose length is specified in the IP header length 535 * field. 536 */ 537 if (rte_cpu_to_be_16(pkt->total_length) < sizeof(struct rte_ipv4_hdr)) 538 return -5; 539 540 return 0; 541 } 542 #endif 543 544 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 545 static void 546 print_ipv4_key(struct ipv4_5tuple key) 547 { 548 printf("IP dst = %08x, IP src = %08x, port dst = %d, port src = %d, " 549 "proto = %d\n", (unsigned)key.ip_dst, (unsigned)key.ip_src, 550 key.port_dst, key.port_src, key.proto); 551 } 552 static void 553 print_ipv6_key(struct ipv6_5tuple key) 554 { 555 printf( "IP dst = " IPv6_BYTES_FMT ", IP src = " IPv6_BYTES_FMT ", " 556 "port dst = %d, port src = %d, proto = %d\n", 557 IPv6_BYTES(key.ip_dst), IPv6_BYTES(key.ip_src), 558 key.port_dst, key.port_src, key.proto); 559 } 560 561 static inline uint16_t 562 get_ipv4_dst_port(struct rte_ipv4_hdr *ipv4_hdr, uint16_t portid, 563 lookup_struct_t * ipv4_l3fwd_lookup_struct) 564 { 565 struct ipv4_5tuple key; 566 struct rte_tcp_hdr *tcp; 567 struct rte_udp_hdr *udp; 568 int ret = 0; 569 570 key.ip_dst = rte_be_to_cpu_32(ipv4_hdr->dst_addr); 571 key.ip_src = rte_be_to_cpu_32(ipv4_hdr->src_addr); 572 key.proto = ipv4_hdr->next_proto_id; 573 574 switch (ipv4_hdr->next_proto_id) { 575 case IPPROTO_TCP: 576 tcp = (struct rte_tcp_hdr *)((unsigned char *)ipv4_hdr + 577 sizeof(struct rte_ipv4_hdr)); 578 key.port_dst = rte_be_to_cpu_16(tcp->dst_port); 579 key.port_src = rte_be_to_cpu_16(tcp->src_port); 580 break; 581 582 case IPPROTO_UDP: 583 udp = (struct rte_udp_hdr *)((unsigned char *)ipv4_hdr + 584 sizeof(struct rte_ipv4_hdr)); 585 key.port_dst = rte_be_to_cpu_16(udp->dst_port); 586 key.port_src = rte_be_to_cpu_16(udp->src_port); 587 break; 588 589 default: 590 key.port_dst = 0; 591 key.port_src = 0; 592 break; 593 } 594 595 /* Find destination port */ 596 ret = rte_hash_lookup(ipv4_l3fwd_lookup_struct, (const void *)&key); 597 return ((ret < 0) ? portid : ipv4_l3fwd_out_if[ret]); 598 } 599 600 static inline uint16_t 601 get_ipv6_dst_port(struct rte_ipv6_hdr *ipv6_hdr, uint16_t portid, 602 lookup_struct_t *ipv6_l3fwd_lookup_struct) 603 { 604 struct ipv6_5tuple key; 605 struct rte_tcp_hdr *tcp; 606 struct rte_udp_hdr *udp; 607 int ret = 0; 608 609 memcpy(key.ip_dst, ipv6_hdr->dst_addr, IPV6_ADDR_LEN); 610 memcpy(key.ip_src, ipv6_hdr->src_addr, IPV6_ADDR_LEN); 611 612 key.proto = ipv6_hdr->proto; 613 614 switch (ipv6_hdr->proto) { 615 case IPPROTO_TCP: 616 tcp = (struct rte_tcp_hdr *)((unsigned char *) ipv6_hdr + 617 sizeof(struct rte_ipv6_hdr)); 618 key.port_dst = rte_be_to_cpu_16(tcp->dst_port); 619 key.port_src = rte_be_to_cpu_16(tcp->src_port); 620 break; 621 622 case IPPROTO_UDP: 623 udp = (struct rte_udp_hdr *)((unsigned char *) ipv6_hdr + 624 sizeof(struct rte_ipv6_hdr)); 625 key.port_dst = rte_be_to_cpu_16(udp->dst_port); 626 key.port_src = rte_be_to_cpu_16(udp->src_port); 627 break; 628 629 default: 630 key.port_dst = 0; 631 key.port_src = 0; 632 break; 633 } 634 635 /* Find destination port */ 636 ret = rte_hash_lookup(ipv6_l3fwd_lookup_struct, (const void *)&key); 637 return ((ret < 0) ? portid : ipv6_l3fwd_out_if[ret]); 638 } 639 #endif 640 641 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 642 static inline uint16_t 643 get_ipv4_dst_port(struct rte_ipv4_hdr *ipv4_hdr, uint16_t portid, 644 lookup_struct_t *ipv4_l3fwd_lookup_struct) 645 { 646 uint32_t next_hop; 647 648 return ((rte_lpm_lookup(ipv4_l3fwd_lookup_struct, 649 rte_be_to_cpu_32(ipv4_hdr->dst_addr), &next_hop) == 0)? 650 next_hop : portid); 651 } 652 #endif 653 654 static inline void 655 parse_ptype_one(struct rte_mbuf *m) 656 { 657 struct rte_ether_hdr *eth_hdr; 658 uint32_t packet_type = RTE_PTYPE_UNKNOWN; 659 uint16_t ether_type; 660 661 eth_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); 662 ether_type = eth_hdr->ether_type; 663 if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4)) 664 packet_type |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN; 665 else if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6)) 666 packet_type |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN; 667 668 m->packet_type = packet_type; 669 } 670 671 static uint16_t 672 cb_parse_ptype(uint16_t port __rte_unused, uint16_t queue __rte_unused, 673 struct rte_mbuf *pkts[], uint16_t nb_pkts, 674 uint16_t max_pkts __rte_unused, 675 void *user_param __rte_unused) 676 { 677 unsigned int i; 678 679 for (i = 0; i < nb_pkts; ++i) 680 parse_ptype_one(pkts[i]); 681 682 return nb_pkts; 683 } 684 685 static int 686 add_cb_parse_ptype(uint16_t portid, uint16_t queueid) 687 { 688 printf("Port %d: softly parse packet type info\n", portid); 689 if (rte_eth_add_rx_callback(portid, queueid, cb_parse_ptype, NULL)) 690 return 0; 691 692 printf("Failed to add rx callback: port=%d\n", portid); 693 return -1; 694 } 695 696 static inline void 697 l3fwd_simple_forward(struct rte_mbuf *m, uint16_t portid, 698 struct lcore_conf *qconf) 699 { 700 struct rte_ether_hdr *eth_hdr; 701 struct rte_ipv4_hdr *ipv4_hdr; 702 void *d_addr_bytes; 703 uint16_t dst_port; 704 705 eth_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); 706 707 if (RTE_ETH_IS_IPV4_HDR(m->packet_type)) { 708 /* Handle IPv4 headers.*/ 709 ipv4_hdr = 710 rte_pktmbuf_mtod_offset(m, struct rte_ipv4_hdr *, 711 sizeof(struct rte_ether_hdr)); 712 713 #ifdef DO_RFC_1812_CHECKS 714 /* Check to make sure the packet is valid (RFC1812) */ 715 if (is_valid_ipv4_pkt(ipv4_hdr, m->pkt_len) < 0) { 716 rte_pktmbuf_free(m); 717 return; 718 } 719 #endif 720 721 dst_port = get_ipv4_dst_port(ipv4_hdr, portid, 722 qconf->ipv4_lookup_struct); 723 if (dst_port >= RTE_MAX_ETHPORTS || 724 (enabled_port_mask & 1 << dst_port) == 0) 725 dst_port = portid; 726 727 /* 02:00:00:00:00:xx */ 728 d_addr_bytes = ð_hdr->dst_addr.addr_bytes[0]; 729 *((uint64_t *)d_addr_bytes) = 730 0x000000000002 + ((uint64_t)dst_port << 40); 731 732 #ifdef DO_RFC_1812_CHECKS 733 /* Update time to live and header checksum */ 734 --(ipv4_hdr->time_to_live); 735 ++(ipv4_hdr->hdr_checksum); 736 #endif 737 738 /* src addr */ 739 rte_ether_addr_copy(&ports_eth_addr[dst_port], 740 ð_hdr->src_addr); 741 742 send_single_packet(m, dst_port); 743 } else if (RTE_ETH_IS_IPV6_HDR(m->packet_type)) { 744 /* Handle IPv6 headers.*/ 745 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 746 struct rte_ipv6_hdr *ipv6_hdr; 747 748 ipv6_hdr = 749 rte_pktmbuf_mtod_offset(m, struct rte_ipv6_hdr *, 750 sizeof(struct rte_ether_hdr)); 751 752 dst_port = get_ipv6_dst_port(ipv6_hdr, portid, 753 qconf->ipv6_lookup_struct); 754 755 if (dst_port >= RTE_MAX_ETHPORTS || 756 (enabled_port_mask & 1 << dst_port) == 0) 757 dst_port = portid; 758 759 /* 02:00:00:00:00:xx */ 760 d_addr_bytes = ð_hdr->dst_addr.addr_bytes[0]; 761 *((uint64_t *)d_addr_bytes) = 762 0x000000000002 + ((uint64_t)dst_port << 40); 763 764 /* src addr */ 765 rte_ether_addr_copy(&ports_eth_addr[dst_port], 766 ð_hdr->src_addr); 767 768 send_single_packet(m, dst_port); 769 #else 770 /* We don't currently handle IPv6 packets in LPM mode. */ 771 rte_pktmbuf_free(m); 772 #endif 773 } else 774 rte_pktmbuf_free(m); 775 776 } 777 778 #define MINIMUM_SLEEP_TIME 1 779 #define SUSPEND_THRESHOLD 300 780 781 static inline uint32_t 782 power_idle_heuristic(uint32_t zero_rx_packet_count) 783 { 784 /* If zero count is less than 100, sleep 1us */ 785 if (zero_rx_packet_count < SUSPEND_THRESHOLD) 786 return MINIMUM_SLEEP_TIME; 787 /* If zero count is less than 1000, sleep 100 us which is the 788 minimum latency switching from C3/C6 to C0 789 */ 790 else 791 return SUSPEND_THRESHOLD; 792 } 793 794 static inline enum freq_scale_hint_t 795 power_freq_scaleup_heuristic(unsigned lcore_id, 796 uint16_t port_id, 797 uint16_t queue_id) 798 { 799 uint32_t rxq_count = rte_eth_rx_queue_count(port_id, queue_id); 800 /** 801 * HW Rx queue size is 128 by default, Rx burst read at maximum 32 entries 802 * per iteration 803 */ 804 #define FREQ_GEAR1_RX_PACKET_THRESHOLD MAX_PKT_BURST 805 #define FREQ_GEAR2_RX_PACKET_THRESHOLD (MAX_PKT_BURST*2) 806 #define FREQ_GEAR3_RX_PACKET_THRESHOLD (MAX_PKT_BURST*3) 807 #define FREQ_UP_TREND1_ACC 1 808 #define FREQ_UP_TREND2_ACC 100 809 #define FREQ_UP_THRESHOLD 10000 810 811 if (likely(rxq_count > FREQ_GEAR3_RX_PACKET_THRESHOLD)) { 812 stats[lcore_id].trend = 0; 813 return FREQ_HIGHEST; 814 } else if (likely(rxq_count > FREQ_GEAR2_RX_PACKET_THRESHOLD)) 815 stats[lcore_id].trend += FREQ_UP_TREND2_ACC; 816 else if (likely(rxq_count > FREQ_GEAR1_RX_PACKET_THRESHOLD)) 817 stats[lcore_id].trend += FREQ_UP_TREND1_ACC; 818 819 if (likely(stats[lcore_id].trend > FREQ_UP_THRESHOLD)) { 820 stats[lcore_id].trend = 0; 821 return FREQ_HIGHER; 822 } 823 824 return FREQ_CURRENT; 825 } 826 827 /** 828 * force polling thread sleep until one-shot rx interrupt triggers 829 * @param port_id 830 * Port id. 831 * @param queue_id 832 * Rx queue id. 833 * @return 834 * 0 on success 835 */ 836 static int 837 sleep_until_rx_interrupt(int num, int lcore) 838 { 839 /* 840 * we want to track when we are woken up by traffic so that we can go 841 * back to sleep again without log spamming. Avoid cache line sharing 842 * to prevent threads stepping on each others' toes. 843 */ 844 static struct { 845 bool wakeup; 846 } __rte_cache_aligned status[RTE_MAX_LCORE]; 847 struct rte_epoll_event event[num]; 848 int n, i; 849 uint16_t port_id; 850 uint8_t queue_id; 851 void *data; 852 853 if (status[lcore].wakeup) { 854 RTE_LOG(INFO, L3FWD_POWER, 855 "lcore %u sleeps until interrupt triggers\n", 856 rte_lcore_id()); 857 } 858 859 n = rte_epoll_wait(RTE_EPOLL_PER_THREAD, event, num, 10); 860 for (i = 0; i < n; i++) { 861 data = event[i].epdata.data; 862 port_id = ((uintptr_t)data) >> CHAR_BIT; 863 queue_id = ((uintptr_t)data) & 864 RTE_LEN2MASK(CHAR_BIT, uint8_t); 865 RTE_LOG(INFO, L3FWD_POWER, 866 "lcore %u is waked up from rx interrupt on" 867 " port %d queue %d\n", 868 rte_lcore_id(), port_id, queue_id); 869 } 870 status[lcore].wakeup = n != 0; 871 872 return 0; 873 } 874 875 static void turn_on_off_intr(struct lcore_conf *qconf, bool on) 876 { 877 int i; 878 struct lcore_rx_queue *rx_queue; 879 uint8_t queue_id; 880 uint16_t port_id; 881 882 for (i = 0; i < qconf->n_rx_queue; ++i) { 883 rx_queue = &(qconf->rx_queue_list[i]); 884 port_id = rx_queue->port_id; 885 queue_id = rx_queue->queue_id; 886 887 rte_spinlock_lock(&(locks[port_id])); 888 if (on) 889 rte_eth_dev_rx_intr_enable(port_id, queue_id); 890 else 891 rte_eth_dev_rx_intr_disable(port_id, queue_id); 892 rte_spinlock_unlock(&(locks[port_id])); 893 } 894 } 895 896 static int event_register(struct lcore_conf *qconf) 897 { 898 struct lcore_rx_queue *rx_queue; 899 uint8_t queueid; 900 uint16_t portid; 901 uint32_t data; 902 int ret; 903 int i; 904 905 for (i = 0; i < qconf->n_rx_queue; ++i) { 906 rx_queue = &(qconf->rx_queue_list[i]); 907 portid = rx_queue->port_id; 908 queueid = rx_queue->queue_id; 909 data = portid << CHAR_BIT | queueid; 910 911 ret = rte_eth_dev_rx_intr_ctl_q(portid, queueid, 912 RTE_EPOLL_PER_THREAD, 913 RTE_INTR_EVENT_ADD, 914 (void *)((uintptr_t)data)); 915 if (ret) 916 return ret; 917 } 918 919 return 0; 920 } 921 922 /* Main processing loop. 8< */ 923 static int main_intr_loop(__rte_unused void *dummy) 924 { 925 struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; 926 unsigned int lcore_id; 927 uint64_t prev_tsc, diff_tsc, cur_tsc; 928 int i, j, nb_rx; 929 uint8_t queueid; 930 uint16_t portid; 931 struct lcore_conf *qconf; 932 struct lcore_rx_queue *rx_queue; 933 uint32_t lcore_rx_idle_count = 0; 934 uint32_t lcore_idle_hint = 0; 935 int intr_en = 0; 936 937 const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / 938 US_PER_S * BURST_TX_DRAIN_US; 939 940 prev_tsc = 0; 941 942 lcore_id = rte_lcore_id(); 943 qconf = &lcore_conf[lcore_id]; 944 945 if (qconf->n_rx_queue == 0) { 946 RTE_LOG(INFO, L3FWD_POWER, "lcore %u has nothing to do\n", 947 lcore_id); 948 return 0; 949 } 950 951 RTE_LOG(INFO, L3FWD_POWER, "entering main interrupt loop on lcore %u\n", 952 lcore_id); 953 954 for (i = 0; i < qconf->n_rx_queue; i++) { 955 portid = qconf->rx_queue_list[i].port_id; 956 queueid = qconf->rx_queue_list[i].queue_id; 957 RTE_LOG(INFO, L3FWD_POWER, 958 " -- lcoreid=%u portid=%u rxqueueid=%hhu\n", 959 lcore_id, portid, queueid); 960 } 961 962 /* add into event wait list */ 963 if (event_register(qconf) == 0) 964 intr_en = 1; 965 else 966 RTE_LOG(INFO, L3FWD_POWER, "RX interrupt won't enable.\n"); 967 968 while (!is_done()) { 969 stats[lcore_id].nb_iteration_looped++; 970 971 cur_tsc = rte_rdtsc(); 972 973 /* 974 * TX burst queue drain 975 */ 976 diff_tsc = cur_tsc - prev_tsc; 977 if (unlikely(diff_tsc > drain_tsc)) { 978 for (i = 0; i < qconf->n_tx_port; ++i) { 979 portid = qconf->tx_port_id[i]; 980 rte_eth_tx_buffer_flush(portid, 981 qconf->tx_queue_id[portid], 982 qconf->tx_buffer[portid]); 983 } 984 prev_tsc = cur_tsc; 985 } 986 987 start_rx: 988 /* 989 * Read packet from RX queues 990 */ 991 lcore_rx_idle_count = 0; 992 for (i = 0; i < qconf->n_rx_queue; ++i) { 993 rx_queue = &(qconf->rx_queue_list[i]); 994 rx_queue->idle_hint = 0; 995 portid = rx_queue->port_id; 996 queueid = rx_queue->queue_id; 997 998 nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst, 999 MAX_PKT_BURST); 1000 1001 stats[lcore_id].nb_rx_processed += nb_rx; 1002 if (unlikely(nb_rx == 0)) { 1003 /** 1004 * no packet received from rx queue, try to 1005 * sleep for a while forcing CPU enter deeper 1006 * C states. 1007 */ 1008 rx_queue->zero_rx_packet_count++; 1009 1010 if (rx_queue->zero_rx_packet_count <= 1011 MIN_ZERO_POLL_COUNT) 1012 continue; 1013 1014 rx_queue->idle_hint = power_idle_heuristic( 1015 rx_queue->zero_rx_packet_count); 1016 lcore_rx_idle_count++; 1017 } else { 1018 rx_queue->zero_rx_packet_count = 0; 1019 } 1020 1021 /* Prefetch first packets */ 1022 for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++) { 1023 rte_prefetch0(rte_pktmbuf_mtod( 1024 pkts_burst[j], void *)); 1025 } 1026 1027 /* Prefetch and forward already prefetched packets */ 1028 for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) { 1029 rte_prefetch0(rte_pktmbuf_mtod( 1030 pkts_burst[j + PREFETCH_OFFSET], 1031 void *)); 1032 l3fwd_simple_forward( 1033 pkts_burst[j], portid, qconf); 1034 } 1035 1036 /* Forward remaining prefetched packets */ 1037 for (; j < nb_rx; j++) { 1038 l3fwd_simple_forward( 1039 pkts_burst[j], portid, qconf); 1040 } 1041 } 1042 1043 if (unlikely(lcore_rx_idle_count == qconf->n_rx_queue)) { 1044 /** 1045 * All Rx queues empty in recent consecutive polls, 1046 * sleep in a conservative manner, meaning sleep as 1047 * less as possible. 1048 */ 1049 for (i = 1, 1050 lcore_idle_hint = qconf->rx_queue_list[0].idle_hint; 1051 i < qconf->n_rx_queue; ++i) { 1052 rx_queue = &(qconf->rx_queue_list[i]); 1053 if (rx_queue->idle_hint < lcore_idle_hint) 1054 lcore_idle_hint = rx_queue->idle_hint; 1055 } 1056 1057 if (lcore_idle_hint < SUSPEND_THRESHOLD) 1058 /** 1059 * execute "pause" instruction to avoid context 1060 * switch which generally take hundred of 1061 * microseconds for short sleep. 1062 */ 1063 rte_delay_us(lcore_idle_hint); 1064 else { 1065 /* suspend until rx interrupt triggers */ 1066 if (intr_en) { 1067 turn_on_off_intr(qconf, 1); 1068 sleep_until_rx_interrupt( 1069 qconf->n_rx_queue, 1070 lcore_id); 1071 turn_on_off_intr(qconf, 0); 1072 /** 1073 * start receiving packets immediately 1074 */ 1075 if (likely(!is_done())) 1076 goto start_rx; 1077 } 1078 } 1079 stats[lcore_id].sleep_time += lcore_idle_hint; 1080 } 1081 } 1082 1083 return 0; 1084 } 1085 /* >8 End of main processing loop. */ 1086 1087 /* main processing loop */ 1088 static int 1089 main_telemetry_loop(__rte_unused void *dummy) 1090 { 1091 struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; 1092 unsigned int lcore_id; 1093 uint64_t prev_tsc, diff_tsc, cur_tsc, prev_tel_tsc; 1094 int i, j, nb_rx; 1095 uint8_t queueid; 1096 uint16_t portid; 1097 struct lcore_conf *qconf; 1098 struct lcore_rx_queue *rx_queue; 1099 uint64_t ep_nep[2] = {0}, fp_nfp[2] = {0}; 1100 uint64_t poll_count; 1101 enum busy_rate br; 1102 1103 const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / 1104 US_PER_S * BURST_TX_DRAIN_US; 1105 1106 poll_count = 0; 1107 prev_tsc = 0; 1108 prev_tel_tsc = 0; 1109 1110 lcore_id = rte_lcore_id(); 1111 qconf = &lcore_conf[lcore_id]; 1112 1113 if (qconf->n_rx_queue == 0) { 1114 RTE_LOG(INFO, L3FWD_POWER, "lcore %u has nothing to do\n", 1115 lcore_id); 1116 return 0; 1117 } 1118 1119 RTE_LOG(INFO, L3FWD_POWER, "entering main telemetry loop on lcore %u\n", 1120 lcore_id); 1121 1122 for (i = 0; i < qconf->n_rx_queue; i++) { 1123 portid = qconf->rx_queue_list[i].port_id; 1124 queueid = qconf->rx_queue_list[i].queue_id; 1125 RTE_LOG(INFO, L3FWD_POWER, " -- lcoreid=%u portid=%u " 1126 "rxqueueid=%hhu\n", lcore_id, portid, queueid); 1127 } 1128 1129 while (!is_done()) { 1130 1131 cur_tsc = rte_rdtsc(); 1132 /* 1133 * TX burst queue drain 1134 */ 1135 diff_tsc = cur_tsc - prev_tsc; 1136 if (unlikely(diff_tsc > drain_tsc)) { 1137 for (i = 0; i < qconf->n_tx_port; ++i) { 1138 portid = qconf->tx_port_id[i]; 1139 rte_eth_tx_buffer_flush(portid, 1140 qconf->tx_queue_id[portid], 1141 qconf->tx_buffer[portid]); 1142 } 1143 prev_tsc = cur_tsc; 1144 } 1145 1146 /* 1147 * Read packet from RX queues 1148 */ 1149 for (i = 0; i < qconf->n_rx_queue; ++i) { 1150 rx_queue = &(qconf->rx_queue_list[i]); 1151 portid = rx_queue->port_id; 1152 queueid = rx_queue->queue_id; 1153 1154 nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst, 1155 MAX_PKT_BURST); 1156 ep_nep[nb_rx == 0]++; 1157 fp_nfp[nb_rx == MAX_PKT_BURST]++; 1158 poll_count++; 1159 RTE_LOG(INFO, L3FWD_POWER, 1160 "lcore %u has woken up on port %d queue %d\n", 1161 rte_lcore_id(), portid, queueid); 1162 if (unlikely(nb_rx == 0)) 1163 continue; 1164 1165 /* Prefetch first packets */ 1166 for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++) { 1167 rte_prefetch0(rte_pktmbuf_mtod( 1168 pkts_burst[j], void *)); 1169 } 1170 1171 /* Prefetch and forward already prefetched packets */ 1172 for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) { 1173 rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[ 1174 j + PREFETCH_OFFSET], void *)); 1175 l3fwd_simple_forward(pkts_burst[j], portid, 1176 qconf); 1177 } 1178 1179 /* Forward remaining prefetched packets */ 1180 for (; j < nb_rx; j++) { 1181 l3fwd_simple_forward(pkts_burst[j], portid, 1182 qconf); 1183 } 1184 } 1185 if (unlikely(poll_count >= DEFAULT_COUNT)) { 1186 diff_tsc = cur_tsc - prev_tel_tsc; 1187 if (diff_tsc >= MAX_CYCLES) { 1188 br = FULL; 1189 } else if (diff_tsc > MIN_CYCLES && 1190 diff_tsc < MAX_CYCLES) { 1191 br = (diff_tsc * 100) / MAX_CYCLES; 1192 } else { 1193 br = ZERO; 1194 } 1195 poll_count = 0; 1196 prev_tel_tsc = cur_tsc; 1197 /* update stats for telemetry */ 1198 rte_spinlock_lock(&stats[lcore_id].telemetry_lock); 1199 stats[lcore_id].ep_nep[0] = ep_nep[0]; 1200 stats[lcore_id].ep_nep[1] = ep_nep[1]; 1201 stats[lcore_id].fp_nfp[0] = fp_nfp[0]; 1202 stats[lcore_id].fp_nfp[1] = fp_nfp[1]; 1203 stats[lcore_id].br = br; 1204 rte_spinlock_unlock(&stats[lcore_id].telemetry_lock); 1205 } 1206 } 1207 1208 return 0; 1209 } 1210 /* main processing loop */ 1211 static int 1212 main_empty_poll_loop(__rte_unused void *dummy) 1213 { 1214 struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; 1215 unsigned int lcore_id; 1216 uint64_t prev_tsc, diff_tsc, cur_tsc; 1217 int i, j, nb_rx; 1218 uint8_t queueid; 1219 uint16_t portid; 1220 struct lcore_conf *qconf; 1221 struct lcore_rx_queue *rx_queue; 1222 1223 const uint64_t drain_tsc = 1224 (rte_get_tsc_hz() + US_PER_S - 1) / 1225 US_PER_S * BURST_TX_DRAIN_US; 1226 1227 prev_tsc = 0; 1228 1229 lcore_id = rte_lcore_id(); 1230 qconf = &lcore_conf[lcore_id]; 1231 1232 if (qconf->n_rx_queue == 0) { 1233 RTE_LOG(INFO, L3FWD_POWER, "lcore %u has nothing to do\n", 1234 lcore_id); 1235 return 0; 1236 } 1237 1238 for (i = 0; i < qconf->n_rx_queue; i++) { 1239 portid = qconf->rx_queue_list[i].port_id; 1240 queueid = qconf->rx_queue_list[i].queue_id; 1241 RTE_LOG(INFO, L3FWD_POWER, " -- lcoreid=%u portid=%u " 1242 "rxqueueid=%hhu\n", lcore_id, portid, queueid); 1243 } 1244 1245 while (!is_done()) { 1246 stats[lcore_id].nb_iteration_looped++; 1247 1248 cur_tsc = rte_rdtsc(); 1249 /* 1250 * TX burst queue drain 1251 */ 1252 diff_tsc = cur_tsc - prev_tsc; 1253 if (unlikely(diff_tsc > drain_tsc)) { 1254 for (i = 0; i < qconf->n_tx_port; ++i) { 1255 portid = qconf->tx_port_id[i]; 1256 rte_eth_tx_buffer_flush(portid, 1257 qconf->tx_queue_id[portid], 1258 qconf->tx_buffer[portid]); 1259 } 1260 prev_tsc = cur_tsc; 1261 } 1262 1263 /* 1264 * Read packet from RX queues 1265 */ 1266 for (i = 0; i < qconf->n_rx_queue; ++i) { 1267 rx_queue = &(qconf->rx_queue_list[i]); 1268 rx_queue->idle_hint = 0; 1269 portid = rx_queue->port_id; 1270 queueid = rx_queue->queue_id; 1271 1272 nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst, 1273 MAX_PKT_BURST); 1274 1275 stats[lcore_id].nb_rx_processed += nb_rx; 1276 1277 if (nb_rx == 0) { 1278 1279 rte_power_empty_poll_stat_update(lcore_id); 1280 1281 continue; 1282 } else { 1283 rte_power_poll_stat_update(lcore_id, nb_rx); 1284 } 1285 1286 1287 /* Prefetch first packets */ 1288 for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++) { 1289 rte_prefetch0(rte_pktmbuf_mtod( 1290 pkts_burst[j], void *)); 1291 } 1292 1293 /* Prefetch and forward already prefetched packets */ 1294 for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) { 1295 rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[ 1296 j + PREFETCH_OFFSET], 1297 void *)); 1298 l3fwd_simple_forward(pkts_burst[j], portid, 1299 qconf); 1300 } 1301 1302 /* Forward remaining prefetched packets */ 1303 for (; j < nb_rx; j++) { 1304 l3fwd_simple_forward(pkts_burst[j], portid, 1305 qconf); 1306 } 1307 1308 } 1309 1310 } 1311 1312 return 0; 1313 } 1314 /* main processing loop */ 1315 static int 1316 main_legacy_loop(__rte_unused void *dummy) 1317 { 1318 struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; 1319 unsigned lcore_id; 1320 uint64_t prev_tsc, diff_tsc, cur_tsc, tim_res_tsc, hz; 1321 uint64_t prev_tsc_power = 0, cur_tsc_power, diff_tsc_power; 1322 int i, j, nb_rx; 1323 uint8_t queueid; 1324 uint16_t portid; 1325 struct lcore_conf *qconf; 1326 struct lcore_rx_queue *rx_queue; 1327 enum freq_scale_hint_t lcore_scaleup_hint; 1328 uint32_t lcore_rx_idle_count = 0; 1329 uint32_t lcore_idle_hint = 0; 1330 int intr_en = 0; 1331 1332 const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * BURST_TX_DRAIN_US; 1333 1334 prev_tsc = 0; 1335 hz = rte_get_timer_hz(); 1336 tim_res_tsc = hz/TIMER_NUMBER_PER_SECOND; 1337 1338 lcore_id = rte_lcore_id(); 1339 qconf = &lcore_conf[lcore_id]; 1340 1341 if (qconf->n_rx_queue == 0) { 1342 RTE_LOG(INFO, L3FWD_POWER, "lcore %u has nothing to do\n", lcore_id); 1343 return 0; 1344 } 1345 1346 RTE_LOG(INFO, L3FWD_POWER, "entering main loop on lcore %u\n", lcore_id); 1347 1348 for (i = 0; i < qconf->n_rx_queue; i++) { 1349 portid = qconf->rx_queue_list[i].port_id; 1350 queueid = qconf->rx_queue_list[i].queue_id; 1351 RTE_LOG(INFO, L3FWD_POWER, " -- lcoreid=%u portid=%u " 1352 "rxqueueid=%hhu\n", lcore_id, portid, queueid); 1353 } 1354 1355 /* add into event wait list */ 1356 if (event_register(qconf) == 0) 1357 intr_en = 1; 1358 else 1359 RTE_LOG(INFO, L3FWD_POWER, "RX interrupt won't enable.\n"); 1360 1361 while (!is_done()) { 1362 stats[lcore_id].nb_iteration_looped++; 1363 1364 cur_tsc = rte_rdtsc(); 1365 cur_tsc_power = cur_tsc; 1366 1367 /* 1368 * TX burst queue drain 1369 */ 1370 diff_tsc = cur_tsc - prev_tsc; 1371 if (unlikely(diff_tsc > drain_tsc)) { 1372 for (i = 0; i < qconf->n_tx_port; ++i) { 1373 portid = qconf->tx_port_id[i]; 1374 rte_eth_tx_buffer_flush(portid, 1375 qconf->tx_queue_id[portid], 1376 qconf->tx_buffer[portid]); 1377 } 1378 prev_tsc = cur_tsc; 1379 } 1380 1381 diff_tsc_power = cur_tsc_power - prev_tsc_power; 1382 if (diff_tsc_power > tim_res_tsc) { 1383 rte_timer_manage(); 1384 prev_tsc_power = cur_tsc_power; 1385 } 1386 1387 start_rx: 1388 /* 1389 * Read packet from RX queues 1390 */ 1391 lcore_scaleup_hint = FREQ_CURRENT; 1392 lcore_rx_idle_count = 0; 1393 for (i = 0; i < qconf->n_rx_queue; ++i) { 1394 rx_queue = &(qconf->rx_queue_list[i]); 1395 rx_queue->idle_hint = 0; 1396 portid = rx_queue->port_id; 1397 queueid = rx_queue->queue_id; 1398 1399 nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst, 1400 MAX_PKT_BURST); 1401 1402 stats[lcore_id].nb_rx_processed += nb_rx; 1403 if (unlikely(nb_rx == 0)) { 1404 /** 1405 * no packet received from rx queue, try to 1406 * sleep for a while forcing CPU enter deeper 1407 * C states. 1408 */ 1409 rx_queue->zero_rx_packet_count++; 1410 1411 if (rx_queue->zero_rx_packet_count <= 1412 MIN_ZERO_POLL_COUNT) 1413 continue; 1414 1415 rx_queue->idle_hint = power_idle_heuristic(\ 1416 rx_queue->zero_rx_packet_count); 1417 lcore_rx_idle_count++; 1418 } else { 1419 rx_queue->zero_rx_packet_count = 0; 1420 1421 /** 1422 * do not scale up frequency immediately as 1423 * user to kernel space communication is costly 1424 * which might impact packet I/O for received 1425 * packets. 1426 */ 1427 rx_queue->freq_up_hint = 1428 power_freq_scaleup_heuristic(lcore_id, 1429 portid, queueid); 1430 } 1431 1432 /* Prefetch first packets */ 1433 for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++) { 1434 rte_prefetch0(rte_pktmbuf_mtod( 1435 pkts_burst[j], void *)); 1436 } 1437 1438 /* Prefetch and forward already prefetched packets */ 1439 for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) { 1440 rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[ 1441 j + PREFETCH_OFFSET], void *)); 1442 l3fwd_simple_forward(pkts_burst[j], portid, 1443 qconf); 1444 } 1445 1446 /* Forward remaining prefetched packets */ 1447 for (; j < nb_rx; j++) { 1448 l3fwd_simple_forward(pkts_burst[j], portid, 1449 qconf); 1450 } 1451 } 1452 1453 if (likely(lcore_rx_idle_count != qconf->n_rx_queue)) { 1454 for (i = 1, lcore_scaleup_hint = 1455 qconf->rx_queue_list[0].freq_up_hint; 1456 i < qconf->n_rx_queue; ++i) { 1457 rx_queue = &(qconf->rx_queue_list[i]); 1458 if (rx_queue->freq_up_hint > 1459 lcore_scaleup_hint) 1460 lcore_scaleup_hint = 1461 rx_queue->freq_up_hint; 1462 } 1463 1464 if (lcore_scaleup_hint == FREQ_HIGHEST) { 1465 if (rte_power_freq_max) 1466 rte_power_freq_max(lcore_id); 1467 } else if (lcore_scaleup_hint == FREQ_HIGHER) { 1468 if (rte_power_freq_up) 1469 rte_power_freq_up(lcore_id); 1470 } 1471 } else { 1472 /** 1473 * All Rx queues empty in recent consecutive polls, 1474 * sleep in a conservative manner, meaning sleep as 1475 * less as possible. 1476 */ 1477 for (i = 1, lcore_idle_hint = 1478 qconf->rx_queue_list[0].idle_hint; 1479 i < qconf->n_rx_queue; ++i) { 1480 rx_queue = &(qconf->rx_queue_list[i]); 1481 if (rx_queue->idle_hint < lcore_idle_hint) 1482 lcore_idle_hint = rx_queue->idle_hint; 1483 } 1484 1485 if (lcore_idle_hint < SUSPEND_THRESHOLD) 1486 /** 1487 * execute "pause" instruction to avoid context 1488 * switch which generally take hundred of 1489 * microseconds for short sleep. 1490 */ 1491 rte_delay_us(lcore_idle_hint); 1492 else { 1493 /* suspend until rx interrupt triggers */ 1494 if (intr_en) { 1495 turn_on_off_intr(qconf, 1); 1496 sleep_until_rx_interrupt( 1497 qconf->n_rx_queue, 1498 lcore_id); 1499 turn_on_off_intr(qconf, 0); 1500 /** 1501 * start receiving packets immediately 1502 */ 1503 if (likely(!is_done())) 1504 goto start_rx; 1505 } 1506 } 1507 stats[lcore_id].sleep_time += lcore_idle_hint; 1508 } 1509 } 1510 1511 return 0; 1512 } 1513 1514 static int 1515 check_lcore_params(void) 1516 { 1517 uint8_t queue, lcore; 1518 uint16_t i; 1519 int socketid; 1520 1521 for (i = 0; i < nb_lcore_params; ++i) { 1522 queue = lcore_params[i].queue_id; 1523 if (queue >= MAX_RX_QUEUE_PER_PORT) { 1524 printf("invalid queue number: %hhu\n", queue); 1525 return -1; 1526 } 1527 lcore = lcore_params[i].lcore_id; 1528 if (!rte_lcore_is_enabled(lcore)) { 1529 printf("error: lcore %hhu is not enabled in lcore " 1530 "mask\n", lcore); 1531 return -1; 1532 } 1533 if ((socketid = rte_lcore_to_socket_id(lcore) != 0) && 1534 (numa_on == 0)) { 1535 printf("warning: lcore %hhu is on socket %d with numa " 1536 "off\n", lcore, socketid); 1537 } 1538 if (app_mode == APP_MODE_TELEMETRY && lcore == rte_lcore_id()) { 1539 printf("cannot enable main core %d in config for telemetry mode\n", 1540 rte_lcore_id()); 1541 return -1; 1542 } 1543 } 1544 return 0; 1545 } 1546 1547 static int 1548 check_port_config(void) 1549 { 1550 unsigned portid; 1551 uint16_t i; 1552 1553 for (i = 0; i < nb_lcore_params; ++i) { 1554 portid = lcore_params[i].port_id; 1555 if ((enabled_port_mask & (1 << portid)) == 0) { 1556 printf("port %u is not enabled in port mask\n", 1557 portid); 1558 return -1; 1559 } 1560 if (!rte_eth_dev_is_valid_port(portid)) { 1561 printf("port %u is not present on the board\n", 1562 portid); 1563 return -1; 1564 } 1565 } 1566 return 0; 1567 } 1568 1569 static uint8_t 1570 get_port_n_rx_queues(const uint16_t port) 1571 { 1572 int queue = -1; 1573 uint16_t i; 1574 1575 for (i = 0; i < nb_lcore_params; ++i) { 1576 if (lcore_params[i].port_id == port && 1577 lcore_params[i].queue_id > queue) 1578 queue = lcore_params[i].queue_id; 1579 } 1580 return (uint8_t)(++queue); 1581 } 1582 1583 static int 1584 init_lcore_rx_queues(void) 1585 { 1586 uint16_t i, nb_rx_queue; 1587 uint8_t lcore; 1588 1589 for (i = 0; i < nb_lcore_params; ++i) { 1590 lcore = lcore_params[i].lcore_id; 1591 nb_rx_queue = lcore_conf[lcore].n_rx_queue; 1592 if (nb_rx_queue >= MAX_RX_QUEUE_PER_LCORE) { 1593 printf("error: too many queues (%u) for lcore: %u\n", 1594 (unsigned)nb_rx_queue + 1, (unsigned)lcore); 1595 return -1; 1596 } else { 1597 lcore_conf[lcore].rx_queue_list[nb_rx_queue].port_id = 1598 lcore_params[i].port_id; 1599 lcore_conf[lcore].rx_queue_list[nb_rx_queue].queue_id = 1600 lcore_params[i].queue_id; 1601 lcore_conf[lcore].n_rx_queue++; 1602 } 1603 } 1604 return 0; 1605 } 1606 1607 /* display usage */ 1608 static void 1609 print_usage(const char *prgname) 1610 { 1611 printf ("%s [EAL options] -- -p PORTMASK -P" 1612 " [--config (port,queue,lcore)[,(port,queue,lcore]]" 1613 " [--high-perf-cores CORELIST" 1614 " [--perf-config (port,queue,hi_perf,lcore_index)[,(port,queue,hi_perf,lcore_index]]" 1615 " [--max-pkt-len PKTLEN]\n" 1616 " -p PORTMASK: hexadecimal bitmask of ports to configure\n" 1617 " -P: enable promiscuous mode\n" 1618 " --config (port,queue,lcore): rx queues configuration\n" 1619 " --high-perf-cores CORELIST: list of high performance cores\n" 1620 " --perf-config: similar as config, cores specified as indices" 1621 " for bins containing high or regular performance cores\n" 1622 " --no-numa: optional, disable numa awareness\n" 1623 " --max-pkt-len PKTLEN: maximum packet length in decimal (64-9600)\n" 1624 " --parse-ptype: parse packet type by software\n" 1625 " --legacy: use legacy interrupt-based scaling\n" 1626 " --empty-poll: enable empty poll detection" 1627 " follow (training_flag, high_threshold, med_threshold)\n" 1628 " --telemetry: enable telemetry mode, to update" 1629 " empty polls, full polls, and core busyness to telemetry\n" 1630 " --interrupt-only: enable interrupt-only mode\n" 1631 " --pmd-mgmt MODE: enable PMD power management mode. " 1632 "Currently supported modes: baseline, monitor, pause, scale\n", 1633 prgname); 1634 } 1635 1636 static int parse_max_pkt_len(const char *pktlen) 1637 { 1638 char *end = NULL; 1639 unsigned long len; 1640 1641 /* parse decimal string */ 1642 len = strtoul(pktlen, &end, 10); 1643 if ((pktlen[0] == '\0') || (end == NULL) || (*end != '\0')) 1644 return -1; 1645 1646 if (len == 0) 1647 return -1; 1648 1649 return len; 1650 } 1651 1652 static int 1653 parse_portmask(const char *portmask) 1654 { 1655 char *end = NULL; 1656 unsigned long pm; 1657 1658 /* parse hexadecimal string */ 1659 pm = strtoul(portmask, &end, 16); 1660 if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0')) 1661 return 0; 1662 1663 return pm; 1664 } 1665 1666 static int 1667 parse_config(const char *q_arg) 1668 { 1669 char s[256]; 1670 const char *p, *p0 = q_arg; 1671 char *end; 1672 enum fieldnames { 1673 FLD_PORT = 0, 1674 FLD_QUEUE, 1675 FLD_LCORE, 1676 _NUM_FLD 1677 }; 1678 unsigned long int_fld[_NUM_FLD]; 1679 char *str_fld[_NUM_FLD]; 1680 int i; 1681 unsigned size; 1682 1683 nb_lcore_params = 0; 1684 1685 while ((p = strchr(p0,'(')) != NULL) { 1686 ++p; 1687 if((p0 = strchr(p,')')) == NULL) 1688 return -1; 1689 1690 size = p0 - p; 1691 if(size >= sizeof(s)) 1692 return -1; 1693 1694 snprintf(s, sizeof(s), "%.*s", size, p); 1695 if (rte_strsplit(s, sizeof(s), str_fld, _NUM_FLD, ',') != 1696 _NUM_FLD) 1697 return -1; 1698 for (i = 0; i < _NUM_FLD; i++){ 1699 errno = 0; 1700 int_fld[i] = strtoul(str_fld[i], &end, 0); 1701 if (errno != 0 || end == str_fld[i] || int_fld[i] > 1702 255) 1703 return -1; 1704 } 1705 if (nb_lcore_params >= MAX_LCORE_PARAMS) { 1706 printf("exceeded max number of lcore params: %hu\n", 1707 nb_lcore_params); 1708 return -1; 1709 } 1710 lcore_params_array[nb_lcore_params].port_id = 1711 (uint8_t)int_fld[FLD_PORT]; 1712 lcore_params_array[nb_lcore_params].queue_id = 1713 (uint8_t)int_fld[FLD_QUEUE]; 1714 lcore_params_array[nb_lcore_params].lcore_id = 1715 (uint8_t)int_fld[FLD_LCORE]; 1716 ++nb_lcore_params; 1717 } 1718 lcore_params = lcore_params_array; 1719 1720 return 0; 1721 } 1722 1723 static int 1724 parse_pmd_mgmt_config(const char *name) 1725 { 1726 #define PMD_MGMT_MONITOR "monitor" 1727 #define PMD_MGMT_PAUSE "pause" 1728 #define PMD_MGMT_SCALE "scale" 1729 #define PMD_MGMT_BASELINE "baseline" 1730 1731 if (strncmp(PMD_MGMT_MONITOR, name, sizeof(PMD_MGMT_MONITOR)) == 0) { 1732 pmgmt_type = RTE_POWER_MGMT_TYPE_MONITOR; 1733 return 0; 1734 } 1735 1736 if (strncmp(PMD_MGMT_PAUSE, name, sizeof(PMD_MGMT_PAUSE)) == 0) { 1737 pmgmt_type = RTE_POWER_MGMT_TYPE_PAUSE; 1738 return 0; 1739 } 1740 1741 if (strncmp(PMD_MGMT_SCALE, name, sizeof(PMD_MGMT_SCALE)) == 0) { 1742 pmgmt_type = RTE_POWER_MGMT_TYPE_SCALE; 1743 return 0; 1744 } 1745 if (strncmp(PMD_MGMT_BASELINE, name, sizeof(PMD_MGMT_BASELINE)) == 0) { 1746 baseline_enabled = true; 1747 return 0; 1748 } 1749 /* unknown PMD power management mode */ 1750 return -1; 1751 } 1752 1753 static int 1754 parse_ep_config(const char *q_arg) 1755 { 1756 char s[256]; 1757 const char *p = q_arg; 1758 char *end; 1759 int num_arg; 1760 1761 char *str_fld[3]; 1762 1763 int training_flag; 1764 int med_edpi; 1765 int hgh_edpi; 1766 1767 ep_med_edpi = EMPTY_POLL_MED_THRESHOLD; 1768 ep_hgh_edpi = EMPTY_POLL_HGH_THRESHOLD; 1769 1770 strlcpy(s, p, sizeof(s)); 1771 1772 num_arg = rte_strsplit(s, sizeof(s), str_fld, 3, ','); 1773 1774 empty_poll_train = false; 1775 1776 if (num_arg == 0) 1777 return 0; 1778 1779 if (num_arg == 3) { 1780 1781 training_flag = strtoul(str_fld[0], &end, 0); 1782 med_edpi = strtoul(str_fld[1], &end, 0); 1783 hgh_edpi = strtoul(str_fld[2], &end, 0); 1784 1785 if (training_flag == 1) 1786 empty_poll_train = true; 1787 1788 if (med_edpi > 0) 1789 ep_med_edpi = med_edpi; 1790 1791 if (hgh_edpi > 0) 1792 ep_hgh_edpi = hgh_edpi; 1793 1794 } else { 1795 1796 return -1; 1797 } 1798 1799 return 0; 1800 1801 } 1802 #define CMD_LINE_OPT_PARSE_PTYPE "parse-ptype" 1803 #define CMD_LINE_OPT_LEGACY "legacy" 1804 #define CMD_LINE_OPT_EMPTY_POLL "empty-poll" 1805 #define CMD_LINE_OPT_INTERRUPT_ONLY "interrupt-only" 1806 #define CMD_LINE_OPT_TELEMETRY "telemetry" 1807 #define CMD_LINE_OPT_PMD_MGMT "pmd-mgmt" 1808 #define CMD_LINE_OPT_MAX_PKT_LEN "max-pkt-len" 1809 1810 /* Parse the argument given in the command line of the application */ 1811 static int 1812 parse_args(int argc, char **argv) 1813 { 1814 int opt, ret; 1815 char **argvopt; 1816 int option_index; 1817 uint32_t limit; 1818 char *prgname = argv[0]; 1819 static struct option lgopts[] = { 1820 {"config", 1, 0, 0}, 1821 {"perf-config", 1, 0, 0}, 1822 {"high-perf-cores", 1, 0, 0}, 1823 {"no-numa", 0, 0, 0}, 1824 {CMD_LINE_OPT_MAX_PKT_LEN, 1, 0, 0}, 1825 {CMD_LINE_OPT_EMPTY_POLL, 1, 0, 0}, 1826 {CMD_LINE_OPT_PARSE_PTYPE, 0, 0, 0}, 1827 {CMD_LINE_OPT_LEGACY, 0, 0, 0}, 1828 {CMD_LINE_OPT_TELEMETRY, 0, 0, 0}, 1829 {CMD_LINE_OPT_INTERRUPT_ONLY, 0, 0, 0}, 1830 {CMD_LINE_OPT_PMD_MGMT, 1, 0, 0}, 1831 {NULL, 0, 0, 0} 1832 }; 1833 1834 argvopt = argv; 1835 1836 while ((opt = getopt_long(argc, argvopt, "p:l:m:h:P", 1837 lgopts, &option_index)) != EOF) { 1838 1839 switch (opt) { 1840 /* portmask */ 1841 case 'p': 1842 enabled_port_mask = parse_portmask(optarg); 1843 if (enabled_port_mask == 0) { 1844 printf("invalid portmask\n"); 1845 print_usage(prgname); 1846 return -1; 1847 } 1848 break; 1849 case 'P': 1850 printf("Promiscuous mode selected\n"); 1851 promiscuous_on = 1; 1852 break; 1853 case 'l': 1854 limit = parse_max_pkt_len(optarg); 1855 freq_tlb[LOW] = limit; 1856 break; 1857 case 'm': 1858 limit = parse_max_pkt_len(optarg); 1859 freq_tlb[MED] = limit; 1860 break; 1861 case 'h': 1862 limit = parse_max_pkt_len(optarg); 1863 freq_tlb[HGH] = limit; 1864 break; 1865 /* long options */ 1866 case 0: 1867 if (!strncmp(lgopts[option_index].name, "config", 6)) { 1868 ret = parse_config(optarg); 1869 if (ret) { 1870 printf("invalid config\n"); 1871 print_usage(prgname); 1872 return -1; 1873 } 1874 } 1875 1876 if (!strncmp(lgopts[option_index].name, 1877 "perf-config", 11)) { 1878 ret = parse_perf_config(optarg); 1879 if (ret) { 1880 printf("invalid perf-config\n"); 1881 print_usage(prgname); 1882 return -1; 1883 } 1884 } 1885 1886 if (!strncmp(lgopts[option_index].name, 1887 "high-perf-cores", 15)) { 1888 ret = parse_perf_core_list(optarg); 1889 if (ret) { 1890 printf("invalid high-perf-cores\n"); 1891 print_usage(prgname); 1892 return -1; 1893 } 1894 } 1895 1896 if (!strncmp(lgopts[option_index].name, 1897 "no-numa", 7)) { 1898 printf("numa is disabled \n"); 1899 numa_on = 0; 1900 } 1901 1902 if (!strncmp(lgopts[option_index].name, 1903 CMD_LINE_OPT_LEGACY, 1904 sizeof(CMD_LINE_OPT_LEGACY))) { 1905 if (app_mode != APP_MODE_DEFAULT) { 1906 printf(" legacy mode is mutually exclusive with other modes\n"); 1907 return -1; 1908 } 1909 app_mode = APP_MODE_LEGACY; 1910 printf("legacy mode is enabled\n"); 1911 } 1912 1913 if (!strncmp(lgopts[option_index].name, 1914 CMD_LINE_OPT_EMPTY_POLL, 10)) { 1915 if (app_mode != APP_MODE_DEFAULT) { 1916 printf(" empty-poll mode is mutually exclusive with other modes\n"); 1917 return -1; 1918 } 1919 app_mode = APP_MODE_EMPTY_POLL; 1920 ret = parse_ep_config(optarg); 1921 1922 if (ret) { 1923 printf("invalid empty poll config\n"); 1924 print_usage(prgname); 1925 return -1; 1926 } 1927 printf("empty-poll is enabled\n"); 1928 } 1929 1930 if (!strncmp(lgopts[option_index].name, 1931 CMD_LINE_OPT_TELEMETRY, 1932 sizeof(CMD_LINE_OPT_TELEMETRY))) { 1933 if (app_mode != APP_MODE_DEFAULT) { 1934 printf(" telemetry mode is mutually exclusive with other modes\n"); 1935 return -1; 1936 } 1937 app_mode = APP_MODE_TELEMETRY; 1938 printf("telemetry mode is enabled\n"); 1939 } 1940 1941 if (!strncmp(lgopts[option_index].name, 1942 CMD_LINE_OPT_PMD_MGMT, 1943 sizeof(CMD_LINE_OPT_PMD_MGMT))) { 1944 if (app_mode != APP_MODE_DEFAULT) { 1945 printf(" power mgmt mode is mutually exclusive with other modes\n"); 1946 return -1; 1947 } 1948 if (parse_pmd_mgmt_config(optarg) < 0) { 1949 printf(" Invalid PMD power management mode: %s\n", 1950 optarg); 1951 return -1; 1952 } 1953 app_mode = APP_MODE_PMD_MGMT; 1954 printf("PMD power mgmt mode is enabled\n"); 1955 } 1956 if (!strncmp(lgopts[option_index].name, 1957 CMD_LINE_OPT_INTERRUPT_ONLY, 1958 sizeof(CMD_LINE_OPT_INTERRUPT_ONLY))) { 1959 if (app_mode != APP_MODE_DEFAULT) { 1960 printf(" interrupt-only mode is mutually exclusive with other modes\n"); 1961 return -1; 1962 } 1963 app_mode = APP_MODE_INTERRUPT; 1964 printf("interrupt-only mode is enabled\n"); 1965 } 1966 1967 if (!strncmp(lgopts[option_index].name, 1968 CMD_LINE_OPT_MAX_PKT_LEN, 1969 sizeof(CMD_LINE_OPT_MAX_PKT_LEN))) { 1970 printf("Custom frame size is configured\n"); 1971 max_pkt_len = parse_max_pkt_len(optarg); 1972 } 1973 1974 if (!strncmp(lgopts[option_index].name, 1975 CMD_LINE_OPT_PARSE_PTYPE, 1976 sizeof(CMD_LINE_OPT_PARSE_PTYPE))) { 1977 printf("soft parse-ptype is enabled\n"); 1978 parse_ptype = 1; 1979 } 1980 1981 break; 1982 1983 default: 1984 print_usage(prgname); 1985 return -1; 1986 } 1987 } 1988 1989 if (optind >= 0) 1990 argv[optind-1] = prgname; 1991 1992 ret = optind-1; 1993 optind = 1; /* reset getopt lib */ 1994 return ret; 1995 } 1996 1997 static void 1998 print_ethaddr(const char *name, const struct rte_ether_addr *eth_addr) 1999 { 2000 char buf[RTE_ETHER_ADDR_FMT_SIZE]; 2001 rte_ether_format_addr(buf, RTE_ETHER_ADDR_FMT_SIZE, eth_addr); 2002 printf("%s%s", name, buf); 2003 } 2004 2005 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 2006 static void 2007 setup_hash(int socketid) 2008 { 2009 struct rte_hash_parameters ipv4_l3fwd_hash_params = { 2010 .name = NULL, 2011 .entries = L3FWD_HASH_ENTRIES, 2012 .key_len = sizeof(struct ipv4_5tuple), 2013 .hash_func = DEFAULT_HASH_FUNC, 2014 .hash_func_init_val = 0, 2015 }; 2016 2017 struct rte_hash_parameters ipv6_l3fwd_hash_params = { 2018 .name = NULL, 2019 .entries = L3FWD_HASH_ENTRIES, 2020 .key_len = sizeof(struct ipv6_5tuple), 2021 .hash_func = DEFAULT_HASH_FUNC, 2022 .hash_func_init_val = 0, 2023 }; 2024 2025 unsigned i; 2026 int ret; 2027 char s[64]; 2028 2029 /* create ipv4 hash */ 2030 snprintf(s, sizeof(s), "ipv4_l3fwd_hash_%d", socketid); 2031 ipv4_l3fwd_hash_params.name = s; 2032 ipv4_l3fwd_hash_params.socket_id = socketid; 2033 ipv4_l3fwd_lookup_struct[socketid] = 2034 rte_hash_create(&ipv4_l3fwd_hash_params); 2035 if (ipv4_l3fwd_lookup_struct[socketid] == NULL) 2036 rte_exit(EXIT_FAILURE, "Unable to create the l3fwd hash on " 2037 "socket %d\n", socketid); 2038 2039 /* create ipv6 hash */ 2040 snprintf(s, sizeof(s), "ipv6_l3fwd_hash_%d", socketid); 2041 ipv6_l3fwd_hash_params.name = s; 2042 ipv6_l3fwd_hash_params.socket_id = socketid; 2043 ipv6_l3fwd_lookup_struct[socketid] = 2044 rte_hash_create(&ipv6_l3fwd_hash_params); 2045 if (ipv6_l3fwd_lookup_struct[socketid] == NULL) 2046 rte_exit(EXIT_FAILURE, "Unable to create the l3fwd hash on " 2047 "socket %d\n", socketid); 2048 2049 2050 /* populate the ipv4 hash */ 2051 for (i = 0; i < RTE_DIM(ipv4_l3fwd_route_array); i++) { 2052 ret = rte_hash_add_key (ipv4_l3fwd_lookup_struct[socketid], 2053 (void *) &ipv4_l3fwd_route_array[i].key); 2054 if (ret < 0) { 2055 rte_exit(EXIT_FAILURE, "Unable to add entry %u to the" 2056 "l3fwd hash on socket %d\n", i, socketid); 2057 } 2058 ipv4_l3fwd_out_if[ret] = ipv4_l3fwd_route_array[i].if_out; 2059 printf("Hash: Adding key\n"); 2060 print_ipv4_key(ipv4_l3fwd_route_array[i].key); 2061 } 2062 2063 /* populate the ipv6 hash */ 2064 for (i = 0; i < RTE_DIM(ipv6_l3fwd_route_array); i++) { 2065 ret = rte_hash_add_key (ipv6_l3fwd_lookup_struct[socketid], 2066 (void *) &ipv6_l3fwd_route_array[i].key); 2067 if (ret < 0) { 2068 rte_exit(EXIT_FAILURE, "Unable to add entry %u to the" 2069 "l3fwd hash on socket %d\n", i, socketid); 2070 } 2071 ipv6_l3fwd_out_if[ret] = ipv6_l3fwd_route_array[i].if_out; 2072 printf("Hash: Adding key\n"); 2073 print_ipv6_key(ipv6_l3fwd_route_array[i].key); 2074 } 2075 } 2076 #endif 2077 2078 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 2079 static void 2080 setup_lpm(int socketid) 2081 { 2082 unsigned i; 2083 int ret; 2084 char s[64]; 2085 2086 /* create the LPM table */ 2087 struct rte_lpm_config lpm_ipv4_config; 2088 2089 lpm_ipv4_config.max_rules = IPV4_L3FWD_LPM_MAX_RULES; 2090 lpm_ipv4_config.number_tbl8s = 256; 2091 lpm_ipv4_config.flags = 0; 2092 2093 snprintf(s, sizeof(s), "IPV4_L3FWD_LPM_%d", socketid); 2094 ipv4_l3fwd_lookup_struct[socketid] = 2095 rte_lpm_create(s, socketid, &lpm_ipv4_config); 2096 if (ipv4_l3fwd_lookup_struct[socketid] == NULL) 2097 rte_exit(EXIT_FAILURE, "Unable to create the l3fwd LPM table" 2098 " on socket %d\n", socketid); 2099 2100 /* populate the LPM table */ 2101 for (i = 0; i < RTE_DIM(ipv4_l3fwd_route_array); i++) { 2102 ret = rte_lpm_add(ipv4_l3fwd_lookup_struct[socketid], 2103 ipv4_l3fwd_route_array[i].ip, 2104 ipv4_l3fwd_route_array[i].depth, 2105 ipv4_l3fwd_route_array[i].if_out); 2106 2107 if (ret < 0) { 2108 rte_exit(EXIT_FAILURE, "Unable to add entry %u to the " 2109 "l3fwd LPM table on socket %d\n", 2110 i, socketid); 2111 } 2112 2113 printf("LPM: Adding route 0x%08x / %d (%d)\n", 2114 (unsigned)ipv4_l3fwd_route_array[i].ip, 2115 ipv4_l3fwd_route_array[i].depth, 2116 ipv4_l3fwd_route_array[i].if_out); 2117 } 2118 } 2119 #endif 2120 2121 static int 2122 init_mem(unsigned nb_mbuf) 2123 { 2124 struct lcore_conf *qconf; 2125 int socketid; 2126 unsigned lcore_id; 2127 char s[64]; 2128 2129 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { 2130 if (rte_lcore_is_enabled(lcore_id) == 0) 2131 continue; 2132 2133 if (numa_on) 2134 socketid = rte_lcore_to_socket_id(lcore_id); 2135 else 2136 socketid = 0; 2137 2138 if (socketid >= NB_SOCKETS) { 2139 rte_exit(EXIT_FAILURE, "Socket %d of lcore %u is " 2140 "out of range %d\n", socketid, 2141 lcore_id, NB_SOCKETS); 2142 } 2143 if (pktmbuf_pool[socketid] == NULL) { 2144 snprintf(s, sizeof(s), "mbuf_pool_%d", socketid); 2145 pktmbuf_pool[socketid] = 2146 rte_pktmbuf_pool_create(s, nb_mbuf, 2147 MEMPOOL_CACHE_SIZE, 0, 2148 RTE_MBUF_DEFAULT_BUF_SIZE, 2149 socketid); 2150 if (pktmbuf_pool[socketid] == NULL) 2151 rte_exit(EXIT_FAILURE, 2152 "Cannot init mbuf pool on socket %d\n", 2153 socketid); 2154 else 2155 printf("Allocated mbuf pool on socket %d\n", 2156 socketid); 2157 2158 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 2159 setup_lpm(socketid); 2160 #else 2161 setup_hash(socketid); 2162 #endif 2163 } 2164 qconf = &lcore_conf[lcore_id]; 2165 qconf->ipv4_lookup_struct = ipv4_l3fwd_lookup_struct[socketid]; 2166 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 2167 qconf->ipv6_lookup_struct = ipv6_l3fwd_lookup_struct[socketid]; 2168 #endif 2169 } 2170 return 0; 2171 } 2172 2173 /* Check the link status of all ports in up to 9s, and print them finally */ 2174 static void 2175 check_all_ports_link_status(uint32_t port_mask) 2176 { 2177 #define CHECK_INTERVAL 100 /* 100ms */ 2178 #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */ 2179 uint8_t count, all_ports_up, print_flag = 0; 2180 uint16_t portid; 2181 struct rte_eth_link link; 2182 int ret; 2183 char link_status_text[RTE_ETH_LINK_MAX_STR_LEN]; 2184 2185 printf("\nChecking link status"); 2186 fflush(stdout); 2187 for (count = 0; count <= MAX_CHECK_TIME; count++) { 2188 all_ports_up = 1; 2189 RTE_ETH_FOREACH_DEV(portid) { 2190 if ((port_mask & (1 << portid)) == 0) 2191 continue; 2192 memset(&link, 0, sizeof(link)); 2193 ret = rte_eth_link_get_nowait(portid, &link); 2194 if (ret < 0) { 2195 all_ports_up = 0; 2196 if (print_flag == 1) 2197 printf("Port %u link get failed: %s\n", 2198 portid, rte_strerror(-ret)); 2199 continue; 2200 } 2201 /* print link status if flag set */ 2202 if (print_flag == 1) { 2203 rte_eth_link_to_str(link_status_text, 2204 sizeof(link_status_text), &link); 2205 printf("Port %d %s\n", portid, 2206 link_status_text); 2207 continue; 2208 } 2209 /* clear all_ports_up flag if any link down */ 2210 if (link.link_status == RTE_ETH_LINK_DOWN) { 2211 all_ports_up = 0; 2212 break; 2213 } 2214 } 2215 /* after finally printing all link status, get out */ 2216 if (print_flag == 1) 2217 break; 2218 2219 if (all_ports_up == 0) { 2220 printf("."); 2221 fflush(stdout); 2222 rte_delay_ms(CHECK_INTERVAL); 2223 } 2224 2225 /* set the print_flag if all ports up or timeout */ 2226 if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) { 2227 print_flag = 1; 2228 printf("done\n"); 2229 } 2230 } 2231 } 2232 2233 static int check_ptype(uint16_t portid) 2234 { 2235 int i, ret; 2236 int ptype_l3_ipv4 = 0; 2237 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 2238 int ptype_l3_ipv6 = 0; 2239 #endif 2240 uint32_t ptype_mask = RTE_PTYPE_L3_MASK; 2241 2242 ret = rte_eth_dev_get_supported_ptypes(portid, ptype_mask, NULL, 0); 2243 if (ret <= 0) 2244 return 0; 2245 2246 uint32_t ptypes[ret]; 2247 2248 ret = rte_eth_dev_get_supported_ptypes(portid, ptype_mask, ptypes, ret); 2249 for (i = 0; i < ret; ++i) { 2250 if (ptypes[i] & RTE_PTYPE_L3_IPV4) 2251 ptype_l3_ipv4 = 1; 2252 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 2253 if (ptypes[i] & RTE_PTYPE_L3_IPV6) 2254 ptype_l3_ipv6 = 1; 2255 #endif 2256 } 2257 2258 if (ptype_l3_ipv4 == 0) 2259 printf("port %d cannot parse RTE_PTYPE_L3_IPV4\n", portid); 2260 2261 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 2262 if (ptype_l3_ipv6 == 0) 2263 printf("port %d cannot parse RTE_PTYPE_L3_IPV6\n", portid); 2264 #endif 2265 2266 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 2267 if (ptype_l3_ipv4) 2268 #else /* APP_LOOKUP_EXACT_MATCH */ 2269 if (ptype_l3_ipv4 && ptype_l3_ipv6) 2270 #endif 2271 return 1; 2272 2273 return 0; 2274 2275 } 2276 2277 static int 2278 init_power_library(void) 2279 { 2280 enum power_management_env env; 2281 unsigned int lcore_id; 2282 int ret = 0; 2283 2284 RTE_LCORE_FOREACH(lcore_id) { 2285 /* init power management library */ 2286 ret = rte_power_init(lcore_id); 2287 if (ret) { 2288 RTE_LOG(ERR, POWER, 2289 "Library initialization failed on core %u\n", 2290 lcore_id); 2291 return ret; 2292 } 2293 /* we're not supporting the VM channel mode */ 2294 env = rte_power_get_env(); 2295 if (env != PM_ENV_ACPI_CPUFREQ && 2296 env != PM_ENV_PSTATE_CPUFREQ) { 2297 RTE_LOG(ERR, POWER, 2298 "Only ACPI and PSTATE mode are supported\n"); 2299 return -1; 2300 } 2301 } 2302 return ret; 2303 } 2304 2305 static int 2306 deinit_power_library(void) 2307 { 2308 unsigned int lcore_id; 2309 int ret = 0; 2310 2311 RTE_LCORE_FOREACH(lcore_id) { 2312 /* deinit power management library */ 2313 ret = rte_power_exit(lcore_id); 2314 if (ret) { 2315 RTE_LOG(ERR, POWER, 2316 "Library deinitialization failed on core %u\n", 2317 lcore_id); 2318 return ret; 2319 } 2320 } 2321 return ret; 2322 } 2323 2324 static void 2325 get_current_stat_values(uint64_t *values) 2326 { 2327 unsigned int lcore_id = rte_lcore_id(); 2328 struct lcore_conf *qconf; 2329 uint64_t app_eps = 0, app_fps = 0, app_br = 0; 2330 uint64_t count = 0; 2331 2332 RTE_LCORE_FOREACH_WORKER(lcore_id) { 2333 qconf = &lcore_conf[lcore_id]; 2334 if (qconf->n_rx_queue == 0) 2335 continue; 2336 count++; 2337 rte_spinlock_lock(&stats[lcore_id].telemetry_lock); 2338 app_eps += stats[lcore_id].ep_nep[1]; 2339 app_fps += stats[lcore_id].fp_nfp[1]; 2340 app_br += stats[lcore_id].br; 2341 rte_spinlock_unlock(&stats[lcore_id].telemetry_lock); 2342 } 2343 2344 if (count > 0) { 2345 values[0] = app_eps/count; 2346 values[1] = app_fps/count; 2347 values[2] = app_br/count; 2348 } else 2349 memset(values, 0, sizeof(uint64_t) * NUM_TELSTATS); 2350 2351 } 2352 2353 static void 2354 update_telemetry(__rte_unused struct rte_timer *tim, 2355 __rte_unused void *arg) 2356 { 2357 int ret; 2358 uint64_t values[NUM_TELSTATS] = {0}; 2359 2360 get_current_stat_values(values); 2361 ret = rte_metrics_update_values(RTE_METRICS_GLOBAL, telstats_index, 2362 values, RTE_DIM(values)); 2363 if (ret < 0) 2364 RTE_LOG(WARNING, POWER, "failed to update metrcis\n"); 2365 } 2366 2367 static int 2368 handle_app_stats(const char *cmd __rte_unused, 2369 const char *params __rte_unused, 2370 struct rte_tel_data *d) 2371 { 2372 uint64_t values[NUM_TELSTATS] = {0}; 2373 uint32_t i; 2374 2375 rte_tel_data_start_dict(d); 2376 get_current_stat_values(values); 2377 for (i = 0; i < NUM_TELSTATS; i++) 2378 rte_tel_data_add_dict_u64(d, telstats_strings[i].name, 2379 values[i]); 2380 return 0; 2381 } 2382 2383 static void 2384 telemetry_setup_timer(void) 2385 { 2386 int lcore_id = rte_lcore_id(); 2387 uint64_t hz = rte_get_timer_hz(); 2388 uint64_t ticks; 2389 2390 ticks = hz / TELEMETRY_INTERVALS_PER_SEC; 2391 rte_timer_reset_sync(&telemetry_timer, 2392 ticks, 2393 PERIODICAL, 2394 lcore_id, 2395 update_telemetry, 2396 NULL); 2397 } 2398 static void 2399 empty_poll_setup_timer(void) 2400 { 2401 int lcore_id = rte_lcore_id(); 2402 uint64_t hz = rte_get_timer_hz(); 2403 2404 struct ep_params *ep_ptr = ep_params; 2405 2406 ep_ptr->interval_ticks = hz / INTERVALS_PER_SECOND; 2407 2408 rte_timer_reset_sync(&ep_ptr->timer0, 2409 ep_ptr->interval_ticks, 2410 PERIODICAL, 2411 lcore_id, 2412 rte_empty_poll_detection, 2413 (void *)ep_ptr); 2414 2415 } 2416 static int 2417 launch_timer(unsigned int lcore_id) 2418 { 2419 int64_t prev_tsc = 0, cur_tsc, diff_tsc, cycles_10ms; 2420 2421 RTE_SET_USED(lcore_id); 2422 2423 2424 if (rte_get_main_lcore() != lcore_id) { 2425 rte_panic("timer on lcore:%d which is not main core:%d\n", 2426 lcore_id, 2427 rte_get_main_lcore()); 2428 } 2429 2430 RTE_LOG(INFO, POWER, "Bring up the Timer\n"); 2431 2432 if (app_mode == APP_MODE_EMPTY_POLL) 2433 empty_poll_setup_timer(); 2434 else 2435 telemetry_setup_timer(); 2436 2437 cycles_10ms = rte_get_timer_hz() / 100; 2438 2439 while (!is_done()) { 2440 cur_tsc = rte_rdtsc(); 2441 diff_tsc = cur_tsc - prev_tsc; 2442 if (diff_tsc > cycles_10ms) { 2443 rte_timer_manage(); 2444 prev_tsc = cur_tsc; 2445 cycles_10ms = rte_get_timer_hz() / 100; 2446 } 2447 } 2448 2449 RTE_LOG(INFO, POWER, "Timer_subsystem is done\n"); 2450 2451 return 0; 2452 } 2453 2454 static int 2455 autodetect_mode(void) 2456 { 2457 RTE_LOG(NOTICE, L3FWD_POWER, "Operating mode not specified, probing frequency scaling support...\n"); 2458 2459 /* 2460 * Empty poll and telemetry modes have to be specifically requested to 2461 * be enabled, but we can auto-detect between interrupt mode with or 2462 * without frequency scaling. Both ACPI and pstate can be used. 2463 */ 2464 if (rte_power_check_env_supported(PM_ENV_ACPI_CPUFREQ)) 2465 return APP_MODE_LEGACY; 2466 if (rte_power_check_env_supported(PM_ENV_PSTATE_CPUFREQ)) 2467 return APP_MODE_LEGACY; 2468 2469 RTE_LOG(NOTICE, L3FWD_POWER, "Frequency scaling not supported, selecting interrupt-only mode\n"); 2470 2471 return APP_MODE_INTERRUPT; 2472 } 2473 2474 static const char * 2475 mode_to_str(enum appmode mode) 2476 { 2477 switch (mode) { 2478 case APP_MODE_LEGACY: 2479 return "legacy"; 2480 case APP_MODE_EMPTY_POLL: 2481 return "empty poll"; 2482 case APP_MODE_TELEMETRY: 2483 return "telemetry"; 2484 case APP_MODE_INTERRUPT: 2485 return "interrupt-only"; 2486 case APP_MODE_PMD_MGMT: 2487 return "pmd mgmt"; 2488 default: 2489 return "invalid"; 2490 } 2491 } 2492 2493 static uint32_t 2494 eth_dev_get_overhead_len(uint32_t max_rx_pktlen, uint16_t max_mtu) 2495 { 2496 uint32_t overhead_len; 2497 2498 if (max_mtu != UINT16_MAX && max_rx_pktlen > max_mtu) 2499 overhead_len = max_rx_pktlen - max_mtu; 2500 else 2501 overhead_len = RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN; 2502 2503 return overhead_len; 2504 } 2505 2506 static int 2507 config_port_max_pkt_len(struct rte_eth_conf *conf, 2508 struct rte_eth_dev_info *dev_info) 2509 { 2510 uint32_t overhead_len; 2511 2512 if (max_pkt_len == 0) 2513 return 0; 2514 2515 if (max_pkt_len < RTE_ETHER_MIN_LEN || max_pkt_len > MAX_JUMBO_PKT_LEN) 2516 return -1; 2517 2518 overhead_len = eth_dev_get_overhead_len(dev_info->max_rx_pktlen, 2519 dev_info->max_mtu); 2520 conf->rxmode.mtu = max_pkt_len - overhead_len; 2521 2522 if (conf->rxmode.mtu > RTE_ETHER_MTU) 2523 conf->txmode.offloads |= RTE_ETH_TX_OFFLOAD_MULTI_SEGS; 2524 2525 return 0; 2526 } 2527 2528 /* Power library initialized in the main routine. 8< */ 2529 int 2530 main(int argc, char **argv) 2531 { 2532 struct lcore_conf *qconf; 2533 struct rte_eth_dev_info dev_info; 2534 struct rte_eth_txconf *txconf; 2535 int ret; 2536 uint16_t nb_ports; 2537 uint16_t queueid; 2538 unsigned lcore_id; 2539 uint64_t hz; 2540 uint32_t n_tx_queue, nb_lcores; 2541 uint32_t dev_rxq_num, dev_txq_num; 2542 uint8_t nb_rx_queue, queue, socketid; 2543 uint16_t portid; 2544 const char *ptr_strings[NUM_TELSTATS]; 2545 2546 /* init EAL */ 2547 ret = rte_eal_init(argc, argv); 2548 if (ret < 0) 2549 rte_exit(EXIT_FAILURE, "Invalid EAL parameters\n"); 2550 argc -= ret; 2551 argv += ret; 2552 2553 /* catch SIGINT and restore cpufreq governor to ondemand */ 2554 signal(SIGINT, signal_exit_now); 2555 2556 /* init RTE timer library to be used late */ 2557 rte_timer_subsystem_init(); 2558 2559 /* if we're running pmd-mgmt mode, don't default to baseline mode */ 2560 baseline_enabled = false; 2561 2562 /* parse application arguments (after the EAL ones) */ 2563 ret = parse_args(argc, argv); 2564 if (ret < 0) 2565 rte_exit(EXIT_FAILURE, "Invalid L3FWD parameters\n"); 2566 2567 if (app_mode == APP_MODE_DEFAULT) 2568 app_mode = autodetect_mode(); 2569 2570 RTE_LOG(INFO, L3FWD_POWER, "Selected operation mode: %s\n", 2571 mode_to_str(app_mode)); 2572 2573 /* only legacy and empty poll mode rely on power library */ 2574 if ((app_mode == APP_MODE_LEGACY || app_mode == APP_MODE_EMPTY_POLL) && 2575 init_power_library()) 2576 rte_exit(EXIT_FAILURE, "init_power_library failed\n"); 2577 2578 if (update_lcore_params() < 0) 2579 rte_exit(EXIT_FAILURE, "update_lcore_params failed\n"); 2580 2581 if (check_lcore_params() < 0) 2582 rte_exit(EXIT_FAILURE, "check_lcore_params failed\n"); 2583 2584 ret = init_lcore_rx_queues(); 2585 if (ret < 0) 2586 rte_exit(EXIT_FAILURE, "init_lcore_rx_queues failed\n"); 2587 2588 nb_ports = rte_eth_dev_count_avail(); 2589 2590 if (check_port_config() < 0) 2591 rte_exit(EXIT_FAILURE, "check_port_config failed\n"); 2592 2593 nb_lcores = rte_lcore_count(); 2594 2595 /* initialize all ports */ 2596 RTE_ETH_FOREACH_DEV(portid) { 2597 struct rte_eth_conf local_port_conf = port_conf; 2598 /* not all app modes need interrupts */ 2599 bool need_intr = app_mode == APP_MODE_LEGACY || 2600 app_mode == APP_MODE_INTERRUPT; 2601 2602 /* skip ports that are not enabled */ 2603 if ((enabled_port_mask & (1 << portid)) == 0) { 2604 printf("\nSkipping disabled port %d\n", portid); 2605 continue; 2606 } 2607 2608 /* init port */ 2609 printf("Initializing port %d ... ", portid ); 2610 fflush(stdout); 2611 2612 ret = rte_eth_dev_info_get(portid, &dev_info); 2613 if (ret != 0) 2614 rte_exit(EXIT_FAILURE, 2615 "Error during getting device (port %u) info: %s\n", 2616 portid, strerror(-ret)); 2617 2618 dev_rxq_num = dev_info.max_rx_queues; 2619 dev_txq_num = dev_info.max_tx_queues; 2620 2621 nb_rx_queue = get_port_n_rx_queues(portid); 2622 if (nb_rx_queue > dev_rxq_num) 2623 rte_exit(EXIT_FAILURE, 2624 "Cannot configure not existed rxq: " 2625 "port=%d\n", portid); 2626 2627 n_tx_queue = nb_lcores; 2628 if (n_tx_queue > dev_txq_num) 2629 n_tx_queue = dev_txq_num; 2630 printf("Creating queues: nb_rxq=%d nb_txq=%u... ", 2631 nb_rx_queue, (unsigned)n_tx_queue ); 2632 /* If number of Rx queue is 0, no need to enable Rx interrupt */ 2633 if (nb_rx_queue == 0) 2634 need_intr = false; 2635 2636 if (need_intr) 2637 local_port_conf.intr_conf.rxq = 1; 2638 2639 ret = rte_eth_dev_info_get(portid, &dev_info); 2640 if (ret != 0) 2641 rte_exit(EXIT_FAILURE, 2642 "Error during getting device (port %u) info: %s\n", 2643 portid, strerror(-ret)); 2644 2645 ret = config_port_max_pkt_len(&local_port_conf, &dev_info); 2646 if (ret != 0) 2647 rte_exit(EXIT_FAILURE, 2648 "Invalid max packet length: %u (port %u)\n", 2649 max_pkt_len, portid); 2650 2651 if (dev_info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) 2652 local_port_conf.txmode.offloads |= 2653 RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE; 2654 2655 local_port_conf.rx_adv_conf.rss_conf.rss_hf &= 2656 dev_info.flow_type_rss_offloads; 2657 if (local_port_conf.rx_adv_conf.rss_conf.rss_hf != 2658 port_conf.rx_adv_conf.rss_conf.rss_hf) { 2659 printf("Port %u modified RSS hash function based on hardware support," 2660 "requested:%#"PRIx64" configured:%#"PRIx64"\n", 2661 portid, 2662 port_conf.rx_adv_conf.rss_conf.rss_hf, 2663 local_port_conf.rx_adv_conf.rss_conf.rss_hf); 2664 } 2665 2666 if (local_port_conf.rx_adv_conf.rss_conf.rss_hf == 0) 2667 local_port_conf.rxmode.mq_mode = RTE_ETH_MQ_RX_NONE; 2668 local_port_conf.rxmode.offloads &= dev_info.rx_offload_capa; 2669 port_conf.rxmode.offloads = local_port_conf.rxmode.offloads; 2670 2671 ret = rte_eth_dev_configure(portid, nb_rx_queue, 2672 (uint16_t)n_tx_queue, &local_port_conf); 2673 if (ret < 0) 2674 rte_exit(EXIT_FAILURE, "Cannot configure device: " 2675 "err=%d, port=%d\n", ret, portid); 2676 2677 ret = rte_eth_dev_adjust_nb_rx_tx_desc(portid, &nb_rxd, 2678 &nb_txd); 2679 if (ret < 0) 2680 rte_exit(EXIT_FAILURE, 2681 "Cannot adjust number of descriptors: err=%d, port=%d\n", 2682 ret, portid); 2683 2684 ret = rte_eth_macaddr_get(portid, &ports_eth_addr[portid]); 2685 if (ret < 0) 2686 rte_exit(EXIT_FAILURE, 2687 "Cannot get MAC address: err=%d, port=%d\n", 2688 ret, portid); 2689 2690 print_ethaddr(" Address:", &ports_eth_addr[portid]); 2691 printf(", "); 2692 2693 /* init memory */ 2694 ret = init_mem(NB_MBUF); 2695 if (ret < 0) 2696 rte_exit(EXIT_FAILURE, "init_mem failed\n"); 2697 2698 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { 2699 if (rte_lcore_is_enabled(lcore_id) == 0) 2700 continue; 2701 2702 /* Initialize TX buffers */ 2703 qconf = &lcore_conf[lcore_id]; 2704 qconf->tx_buffer[portid] = rte_zmalloc_socket("tx_buffer", 2705 RTE_ETH_TX_BUFFER_SIZE(MAX_PKT_BURST), 0, 2706 rte_eth_dev_socket_id(portid)); 2707 if (qconf->tx_buffer[portid] == NULL) 2708 rte_exit(EXIT_FAILURE, "Can't allocate tx buffer for port %u\n", 2709 portid); 2710 2711 rte_eth_tx_buffer_init(qconf->tx_buffer[portid], MAX_PKT_BURST); 2712 } 2713 2714 /* init one TX queue per couple (lcore,port) */ 2715 queueid = 0; 2716 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { 2717 if (rte_lcore_is_enabled(lcore_id) == 0) 2718 continue; 2719 2720 if (queueid >= dev_txq_num) 2721 continue; 2722 2723 if (numa_on) 2724 socketid = \ 2725 (uint8_t)rte_lcore_to_socket_id(lcore_id); 2726 else 2727 socketid = 0; 2728 2729 printf("txq=%u,%d,%d ", lcore_id, queueid, socketid); 2730 fflush(stdout); 2731 2732 txconf = &dev_info.default_txconf; 2733 txconf->offloads = local_port_conf.txmode.offloads; 2734 ret = rte_eth_tx_queue_setup(portid, queueid, nb_txd, 2735 socketid, txconf); 2736 if (ret < 0) 2737 rte_exit(EXIT_FAILURE, 2738 "rte_eth_tx_queue_setup: err=%d, " 2739 "port=%d\n", ret, portid); 2740 2741 qconf = &lcore_conf[lcore_id]; 2742 qconf->tx_queue_id[portid] = queueid; 2743 queueid++; 2744 2745 qconf->tx_port_id[qconf->n_tx_port] = portid; 2746 qconf->n_tx_port++; 2747 } 2748 printf("\n"); 2749 } 2750 2751 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { 2752 if (rte_lcore_is_enabled(lcore_id) == 0) 2753 continue; 2754 2755 if (app_mode == APP_MODE_LEGACY) { 2756 /* init timer structures for each enabled lcore */ 2757 rte_timer_init(&power_timers[lcore_id]); 2758 hz = rte_get_timer_hz(); 2759 rte_timer_reset(&power_timers[lcore_id], 2760 hz/TIMER_NUMBER_PER_SECOND, 2761 SINGLE, lcore_id, 2762 power_timer_cb, NULL); 2763 } 2764 qconf = &lcore_conf[lcore_id]; 2765 printf("\nInitializing rx queues on lcore %u ... ", lcore_id ); 2766 fflush(stdout); 2767 2768 /* init RX queues */ 2769 for(queue = 0; queue < qconf->n_rx_queue; ++queue) { 2770 struct rte_eth_rxconf rxq_conf; 2771 2772 portid = qconf->rx_queue_list[queue].port_id; 2773 queueid = qconf->rx_queue_list[queue].queue_id; 2774 2775 if (numa_on) 2776 socketid = \ 2777 (uint8_t)rte_lcore_to_socket_id(lcore_id); 2778 else 2779 socketid = 0; 2780 2781 printf("rxq=%d,%d,%d ", portid, queueid, socketid); 2782 fflush(stdout); 2783 2784 ret = rte_eth_dev_info_get(portid, &dev_info); 2785 if (ret != 0) 2786 rte_exit(EXIT_FAILURE, 2787 "Error during getting device (port %u) info: %s\n", 2788 portid, strerror(-ret)); 2789 2790 rxq_conf = dev_info.default_rxconf; 2791 rxq_conf.offloads = port_conf.rxmode.offloads; 2792 ret = rte_eth_rx_queue_setup(portid, queueid, nb_rxd, 2793 socketid, &rxq_conf, 2794 pktmbuf_pool[socketid]); 2795 if (ret < 0) 2796 rte_exit(EXIT_FAILURE, 2797 "rte_eth_rx_queue_setup: err=%d, " 2798 "port=%d\n", ret, portid); 2799 2800 if (parse_ptype) { 2801 if (add_cb_parse_ptype(portid, queueid) < 0) 2802 rte_exit(EXIT_FAILURE, 2803 "Fail to add ptype cb\n"); 2804 } 2805 2806 if (app_mode == APP_MODE_PMD_MGMT && !baseline_enabled) { 2807 ret = rte_power_ethdev_pmgmt_queue_enable( 2808 lcore_id, portid, queueid, 2809 pmgmt_type); 2810 if (ret < 0) 2811 rte_exit(EXIT_FAILURE, 2812 "rte_power_ethdev_pmgmt_queue_enable: err=%d, port=%d\n", 2813 ret, portid); 2814 } 2815 } 2816 } 2817 /* >8 End of power library initialization. */ 2818 2819 printf("\n"); 2820 2821 /* start ports */ 2822 RTE_ETH_FOREACH_DEV(portid) { 2823 if ((enabled_port_mask & (1 << portid)) == 0) { 2824 continue; 2825 } 2826 /* Start device */ 2827 ret = rte_eth_dev_start(portid); 2828 if (ret < 0) 2829 rte_exit(EXIT_FAILURE, "rte_eth_dev_start: err=%d, " 2830 "port=%d\n", ret, portid); 2831 /* 2832 * If enabled, put device in promiscuous mode. 2833 * This allows IO forwarding mode to forward packets 2834 * to itself through 2 cross-connected ports of the 2835 * target machine. 2836 */ 2837 if (promiscuous_on) { 2838 ret = rte_eth_promiscuous_enable(portid); 2839 if (ret != 0) 2840 rte_exit(EXIT_FAILURE, 2841 "rte_eth_promiscuous_enable: err=%s, port=%u\n", 2842 rte_strerror(-ret), portid); 2843 } 2844 /* initialize spinlock for each port */ 2845 rte_spinlock_init(&(locks[portid])); 2846 2847 if (!parse_ptype) 2848 if (!check_ptype(portid)) 2849 rte_exit(EXIT_FAILURE, 2850 "PMD can not provide needed ptypes\n"); 2851 } 2852 2853 check_all_ports_link_status(enabled_port_mask); 2854 2855 if (app_mode == APP_MODE_EMPTY_POLL) { 2856 2857 if (empty_poll_train) { 2858 policy.state = TRAINING; 2859 } else { 2860 policy.state = MED_NORMAL; 2861 policy.med_base_edpi = ep_med_edpi; 2862 policy.hgh_base_edpi = ep_hgh_edpi; 2863 } 2864 2865 ret = rte_power_empty_poll_stat_init(&ep_params, 2866 freq_tlb, 2867 &policy); 2868 if (ret < 0) 2869 rte_exit(EXIT_FAILURE, "empty poll init failed"); 2870 } 2871 2872 2873 /* launch per-lcore init on every lcore */ 2874 if (app_mode == APP_MODE_LEGACY) { 2875 rte_eal_mp_remote_launch(main_legacy_loop, NULL, CALL_MAIN); 2876 } else if (app_mode == APP_MODE_EMPTY_POLL) { 2877 empty_poll_stop = false; 2878 rte_eal_mp_remote_launch(main_empty_poll_loop, NULL, 2879 SKIP_MAIN); 2880 } else if (app_mode == APP_MODE_TELEMETRY) { 2881 unsigned int i; 2882 2883 /* Init metrics library */ 2884 rte_metrics_init(rte_socket_id()); 2885 /** Register stats with metrics library */ 2886 for (i = 0; i < NUM_TELSTATS; i++) 2887 ptr_strings[i] = telstats_strings[i].name; 2888 2889 ret = rte_metrics_reg_names(ptr_strings, NUM_TELSTATS); 2890 if (ret >= 0) 2891 telstats_index = ret; 2892 else 2893 rte_exit(EXIT_FAILURE, "failed to register metrics names"); 2894 2895 RTE_LCORE_FOREACH_WORKER(lcore_id) { 2896 rte_spinlock_init(&stats[lcore_id].telemetry_lock); 2897 } 2898 rte_timer_init(&telemetry_timer); 2899 rte_telemetry_register_cmd("/l3fwd-power/stats", 2900 handle_app_stats, 2901 "Returns global power stats. Parameters: None"); 2902 rte_eal_mp_remote_launch(main_telemetry_loop, NULL, 2903 SKIP_MAIN); 2904 } else if (app_mode == APP_MODE_INTERRUPT) { 2905 rte_eal_mp_remote_launch(main_intr_loop, NULL, CALL_MAIN); 2906 } else if (app_mode == APP_MODE_PMD_MGMT) { 2907 /* reuse telemetry loop for PMD power management mode */ 2908 rte_eal_mp_remote_launch(main_telemetry_loop, NULL, CALL_MAIN); 2909 } 2910 2911 if (app_mode == APP_MODE_EMPTY_POLL || app_mode == APP_MODE_TELEMETRY) 2912 launch_timer(rte_lcore_id()); 2913 2914 RTE_LCORE_FOREACH_WORKER(lcore_id) { 2915 if (rte_eal_wait_lcore(lcore_id) < 0) 2916 return -1; 2917 } 2918 2919 if (app_mode == APP_MODE_PMD_MGMT) { 2920 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { 2921 if (rte_lcore_is_enabled(lcore_id) == 0) 2922 continue; 2923 qconf = &lcore_conf[lcore_id]; 2924 for (queue = 0; queue < qconf->n_rx_queue; ++queue) { 2925 portid = qconf->rx_queue_list[queue].port_id; 2926 queueid = qconf->rx_queue_list[queue].queue_id; 2927 2928 rte_power_ethdev_pmgmt_queue_disable(lcore_id, 2929 portid, queueid); 2930 } 2931 } 2932 } 2933 2934 RTE_ETH_FOREACH_DEV(portid) 2935 { 2936 if ((enabled_port_mask & (1 << portid)) == 0) 2937 continue; 2938 2939 ret = rte_eth_dev_stop(portid); 2940 if (ret != 0) 2941 RTE_LOG(ERR, L3FWD_POWER, "rte_eth_dev_stop: err=%d, port=%u\n", 2942 ret, portid); 2943 2944 rte_eth_dev_close(portid); 2945 } 2946 2947 if (app_mode == APP_MODE_EMPTY_POLL) 2948 rte_power_empty_poll_stat_free(); 2949 2950 if ((app_mode == APP_MODE_LEGACY || app_mode == APP_MODE_EMPTY_POLL) && 2951 deinit_power_library()) 2952 rte_exit(EXIT_FAILURE, "deinit_power_library failed\n"); 2953 2954 if (rte_eal_cleanup() < 0) 2955 RTE_LOG(ERR, L3FWD_POWER, "EAL cleanup failed\n"); 2956 2957 return 0; 2958 } 2959