1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2018 Intel Corporation 3 */ 4 5 #include <stdio.h> 6 #include <stdlib.h> 7 #include <stdint.h> 8 #include <inttypes.h> 9 #include <sys/types.h> 10 #include <string.h> 11 #include <sys/queue.h> 12 #include <stdarg.h> 13 #include <errno.h> 14 #include <getopt.h> 15 #include <unistd.h> 16 #include <signal.h> 17 #include <math.h> 18 19 #include <rte_common.h> 20 #include <rte_byteorder.h> 21 #include <rte_log.h> 22 #include <rte_malloc.h> 23 #include <rte_memory.h> 24 #include <rte_memcpy.h> 25 #include <rte_eal.h> 26 #include <rte_launch.h> 27 #include <rte_cycles.h> 28 #include <rte_prefetch.h> 29 #include <rte_lcore.h> 30 #include <rte_per_lcore.h> 31 #include <rte_branch_prediction.h> 32 #include <rte_interrupts.h> 33 #include <rte_random.h> 34 #include <rte_debug.h> 35 #include <rte_ether.h> 36 #include <rte_ethdev.h> 37 #include <rte_mempool.h> 38 #include <rte_mbuf.h> 39 #include <rte_ip.h> 40 #include <rte_tcp.h> 41 #include <rte_udp.h> 42 #include <rte_string_fns.h> 43 #include <rte_timer.h> 44 #include <rte_power.h> 45 #include <rte_spinlock.h> 46 #include <rte_power_empty_poll.h> 47 #include <rte_metrics.h> 48 #include <rte_telemetry.h> 49 #include <rte_power_pmd_mgmt.h> 50 #include <rte_power_intel_uncore.h> 51 52 #include "perf_core.h" 53 #include "main.h" 54 55 #define RTE_LOGTYPE_L3FWD_POWER RTE_LOGTYPE_USER1 56 57 #define MAX_PKT_BURST 32 58 59 #define MIN_ZERO_POLL_COUNT 10 60 61 /* 100 ms interval */ 62 #define TIMER_NUMBER_PER_SECOND 10 63 /* (10ms) */ 64 #define INTERVALS_PER_SECOND 100 65 /* 100000 us */ 66 #define SCALING_PERIOD (1000000/TIMER_NUMBER_PER_SECOND) 67 #define SCALING_DOWN_TIME_RATIO_THRESHOLD 0.25 68 69 #define APP_LOOKUP_EXACT_MATCH 0 70 #define APP_LOOKUP_LPM 1 71 #define DO_RFC_1812_CHECKS 72 73 #ifndef APP_LOOKUP_METHOD 74 #define APP_LOOKUP_METHOD APP_LOOKUP_LPM 75 #endif 76 77 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 78 #include <rte_hash.h> 79 #elif (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 80 #include <rte_lpm.h> 81 #else 82 #error "APP_LOOKUP_METHOD set to incorrect value" 83 #endif 84 85 #ifndef IPv6_BYTES 86 #define IPv6_BYTES_FMT "%02x%02x:%02x%02x:%02x%02x:%02x%02x:"\ 87 "%02x%02x:%02x%02x:%02x%02x:%02x%02x" 88 #define IPv6_BYTES(addr) \ 89 addr[0], addr[1], addr[2], addr[3], \ 90 addr[4], addr[5], addr[6], addr[7], \ 91 addr[8], addr[9], addr[10], addr[11],\ 92 addr[12], addr[13],addr[14], addr[15] 93 #endif 94 95 #define MAX_JUMBO_PKT_LEN 9600 96 97 #define IPV6_ADDR_LEN 16 98 99 #define MEMPOOL_CACHE_SIZE 256 100 101 /* 102 * This expression is used to calculate the number of mbufs needed depending on 103 * user input, taking into account memory for rx and tx hardware rings, cache 104 * per lcore and mtable per port per lcore. RTE_MAX is used to ensure that 105 * NB_MBUF never goes below a minimum value of 8192. 106 */ 107 108 #define NB_MBUF RTE_MAX ( \ 109 (nb_ports*nb_rx_queue*nb_rxd + \ 110 nb_ports*nb_lcores*MAX_PKT_BURST + \ 111 nb_ports*n_tx_queue*nb_txd + \ 112 nb_lcores*MEMPOOL_CACHE_SIZE), \ 113 (unsigned)8192) 114 115 #define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */ 116 117 #define NB_SOCKETS 8 118 119 /* Configure how many packets ahead to prefetch, when reading packets */ 120 #define PREFETCH_OFFSET 3 121 122 /* 123 * Configurable number of RX/TX ring descriptors 124 */ 125 #define RX_DESC_DEFAULT 1024 126 #define TX_DESC_DEFAULT 1024 127 128 /* 129 * These two thresholds were decided on by running the training algorithm on 130 * a 2.5GHz Xeon. These defaults can be overridden by supplying non-zero values 131 * for the med_threshold and high_threshold parameters on the command line. 132 */ 133 #define EMPTY_POLL_MED_THRESHOLD 350000UL 134 #define EMPTY_POLL_HGH_THRESHOLD 580000UL 135 136 #define NUM_TELSTATS RTE_DIM(telstats_strings) 137 138 static uint16_t nb_rxd = RX_DESC_DEFAULT; 139 static uint16_t nb_txd = TX_DESC_DEFAULT; 140 141 /* ethernet addresses of ports */ 142 static struct rte_ether_addr ports_eth_addr[RTE_MAX_ETHPORTS]; 143 144 /* ethernet addresses of ports */ 145 static rte_spinlock_t locks[RTE_MAX_ETHPORTS]; 146 147 /* mask of enabled ports */ 148 static uint32_t enabled_port_mask = 0; 149 /* Ports set in promiscuous mode off by default. */ 150 static int promiscuous_on = 0; 151 /* NUMA is enabled by default. */ 152 static int numa_on = 1; 153 static bool empty_poll_stop; 154 static bool empty_poll_train; 155 volatile bool quit_signal; 156 static struct ep_params *ep_params; 157 static struct ep_policy policy; 158 static long ep_med_edpi, ep_hgh_edpi; 159 /* timer to update telemetry every 500ms */ 160 static struct rte_timer telemetry_timer; 161 162 /* stats index returned by metrics lib */ 163 int telstats_index; 164 165 /* flag to check if uncore option enabled */ 166 int enabled_uncore = -1; 167 168 struct telstats_name { 169 char name[RTE_ETH_XSTATS_NAME_SIZE]; 170 }; 171 172 /* telemetry stats to be reported */ 173 const struct telstats_name telstats_strings[] = { 174 {"empty_poll"}, 175 {"full_poll"}, 176 {"busy_percent"} 177 }; 178 179 /* core busyness in percentage */ 180 enum busy_rate { 181 ZERO = 0, 182 PARTIAL = 50, 183 FULL = 100 184 }; 185 186 enum uncore_choice { 187 UNCORE_MIN = 0, 188 UNCORE_MAX = 1, 189 UNCORE_IDX = 2 190 }; 191 192 /* reference poll count to measure core busyness */ 193 #define DEFAULT_COUNT 10000 194 /* 195 * reference CYCLES to be used to 196 * measure core busyness based on poll count 197 */ 198 #define MIN_CYCLES 1500000ULL 199 #define MAX_CYCLES 22000000ULL 200 201 /* (500ms) */ 202 #define TELEMETRY_INTERVALS_PER_SEC 2 203 204 static int parse_ptype; /**< Parse packet type using rx callback, and */ 205 /**< disabled by default */ 206 207 enum appmode { 208 APP_MODE_DEFAULT = 0, 209 APP_MODE_LEGACY, 210 APP_MODE_EMPTY_POLL, 211 APP_MODE_TELEMETRY, 212 APP_MODE_INTERRUPT, 213 APP_MODE_PMD_MGMT 214 }; 215 216 enum appmode app_mode; 217 218 static enum rte_power_pmd_mgmt_type pmgmt_type; 219 bool baseline_enabled; 220 221 enum freq_scale_hint_t 222 { 223 FREQ_LOWER = -1, 224 FREQ_CURRENT = 0, 225 FREQ_HIGHER = 1, 226 FREQ_HIGHEST = 2 227 }; 228 229 struct lcore_rx_queue { 230 uint16_t port_id; 231 uint8_t queue_id; 232 enum freq_scale_hint_t freq_up_hint; 233 uint32_t zero_rx_packet_count; 234 uint32_t idle_hint; 235 } __rte_cache_aligned; 236 237 #define MAX_RX_QUEUE_PER_LCORE 16 238 #define MAX_TX_QUEUE_PER_PORT RTE_MAX_ETHPORTS 239 #define MAX_RX_QUEUE_PER_PORT 128 240 241 #define MAX_RX_QUEUE_INTERRUPT_PER_PORT 16 242 243 244 struct lcore_params lcore_params_array[MAX_LCORE_PARAMS]; 245 static struct lcore_params lcore_params_array_default[] = { 246 {0, 0, 2}, 247 {0, 1, 2}, 248 {0, 2, 2}, 249 {1, 0, 2}, 250 {1, 1, 2}, 251 {1, 2, 2}, 252 {2, 0, 2}, 253 {3, 0, 3}, 254 {3, 1, 3}, 255 }; 256 257 struct lcore_params *lcore_params = lcore_params_array_default; 258 uint16_t nb_lcore_params = RTE_DIM(lcore_params_array_default); 259 260 static struct rte_eth_conf port_conf = { 261 .rxmode = { 262 .mq_mode = RTE_ETH_MQ_RX_RSS, 263 .offloads = RTE_ETH_RX_OFFLOAD_CHECKSUM, 264 }, 265 .rx_adv_conf = { 266 .rss_conf = { 267 .rss_key = NULL, 268 .rss_hf = RTE_ETH_RSS_UDP, 269 }, 270 }, 271 .txmode = { 272 .mq_mode = RTE_ETH_MQ_TX_NONE, 273 } 274 }; 275 276 static uint32_t max_pkt_len; 277 static uint32_t max_empty_polls = 512; 278 static uint32_t pause_duration = 1; 279 static uint32_t scale_freq_min; 280 static uint32_t scale_freq_max; 281 282 static struct rte_mempool * pktmbuf_pool[NB_SOCKETS]; 283 284 285 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 286 287 #ifdef RTE_ARCH_X86 288 #include <rte_hash_crc.h> 289 #define DEFAULT_HASH_FUNC rte_hash_crc 290 #else 291 #include <rte_jhash.h> 292 #define DEFAULT_HASH_FUNC rte_jhash 293 #endif 294 295 struct ipv4_5tuple { 296 uint32_t ip_dst; 297 uint32_t ip_src; 298 uint16_t port_dst; 299 uint16_t port_src; 300 uint8_t proto; 301 } __rte_packed; 302 303 struct ipv6_5tuple { 304 uint8_t ip_dst[IPV6_ADDR_LEN]; 305 uint8_t ip_src[IPV6_ADDR_LEN]; 306 uint16_t port_dst; 307 uint16_t port_src; 308 uint8_t proto; 309 } __rte_packed; 310 311 struct ipv4_l3fwd_route { 312 struct ipv4_5tuple key; 313 uint8_t if_out; 314 }; 315 316 struct ipv6_l3fwd_route { 317 struct ipv6_5tuple key; 318 uint8_t if_out; 319 }; 320 321 static struct ipv4_l3fwd_route ipv4_l3fwd_route_array[] = { 322 {{RTE_IPV4(100,10,0,1), RTE_IPV4(200,10,0,1), 101, 11, IPPROTO_TCP}, 0}, 323 {{RTE_IPV4(100,20,0,2), RTE_IPV4(200,20,0,2), 102, 12, IPPROTO_TCP}, 1}, 324 {{RTE_IPV4(100,30,0,3), RTE_IPV4(200,30,0,3), 103, 13, IPPROTO_TCP}, 2}, 325 {{RTE_IPV4(100,40,0,4), RTE_IPV4(200,40,0,4), 104, 14, IPPROTO_TCP}, 3}, 326 }; 327 328 static struct ipv6_l3fwd_route ipv6_l3fwd_route_array[] = { 329 { 330 { 331 {0xfe, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 332 0x02, 0x1b, 0x21, 0xff, 0xfe, 0x91, 0x38, 0x05}, 333 {0xfe, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 334 0x02, 0x1e, 0x67, 0xff, 0xfe, 0x0d, 0xb6, 0x0a}, 335 1, 10, IPPROTO_UDP 336 }, 4 337 }, 338 }; 339 340 typedef struct rte_hash lookup_struct_t; 341 static lookup_struct_t *ipv4_l3fwd_lookup_struct[NB_SOCKETS]; 342 static lookup_struct_t *ipv6_l3fwd_lookup_struct[NB_SOCKETS]; 343 344 #define L3FWD_HASH_ENTRIES 1024 345 346 static uint16_t ipv4_l3fwd_out_if[L3FWD_HASH_ENTRIES] __rte_cache_aligned; 347 static uint16_t ipv6_l3fwd_out_if[L3FWD_HASH_ENTRIES] __rte_cache_aligned; 348 #endif 349 350 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 351 struct ipv4_l3fwd_route { 352 uint32_t ip; 353 uint8_t depth; 354 uint8_t if_out; 355 }; 356 357 static struct ipv4_l3fwd_route ipv4_l3fwd_route_array[] = { 358 {RTE_IPV4(1,1,1,0), 24, 0}, 359 {RTE_IPV4(2,1,1,0), 24, 1}, 360 {RTE_IPV4(3,1,1,0), 24, 2}, 361 {RTE_IPV4(4,1,1,0), 24, 3}, 362 {RTE_IPV4(5,1,1,0), 24, 4}, 363 {RTE_IPV4(6,1,1,0), 24, 5}, 364 {RTE_IPV4(7,1,1,0), 24, 6}, 365 {RTE_IPV4(8,1,1,0), 24, 7}, 366 }; 367 368 #define IPV4_L3FWD_LPM_MAX_RULES 1024 369 370 typedef struct rte_lpm lookup_struct_t; 371 static lookup_struct_t *ipv4_l3fwd_lookup_struct[NB_SOCKETS]; 372 #endif 373 374 struct lcore_conf { 375 uint16_t n_rx_queue; 376 struct lcore_rx_queue rx_queue_list[MAX_RX_QUEUE_PER_LCORE]; 377 uint16_t n_tx_port; 378 uint16_t tx_port_id[RTE_MAX_ETHPORTS]; 379 uint16_t tx_queue_id[RTE_MAX_ETHPORTS]; 380 struct rte_eth_dev_tx_buffer *tx_buffer[RTE_MAX_ETHPORTS]; 381 lookup_struct_t * ipv4_lookup_struct; 382 lookup_struct_t * ipv6_lookup_struct; 383 } __rte_cache_aligned; 384 385 struct lcore_stats { 386 /* total sleep time in ms since last frequency scaling down */ 387 uint32_t sleep_time; 388 /* number of long sleep recently */ 389 uint32_t nb_long_sleep; 390 /* freq. scaling up trend */ 391 uint32_t trend; 392 /* total packet processed recently */ 393 uint64_t nb_rx_processed; 394 /* total iterations looped recently */ 395 uint64_t nb_iteration_looped; 396 /* 397 * Represents empty and non empty polls 398 * of rte_eth_rx_burst(); 399 * ep_nep[0] holds non empty polls 400 * i.e. 0 < nb_rx <= MAX_BURST 401 * ep_nep[1] holds empty polls. 402 * i.e. nb_rx == 0 403 */ 404 uint64_t ep_nep[2]; 405 /* 406 * Represents full and empty+partial 407 * polls of rte_eth_rx_burst(); 408 * ep_nep[0] holds empty+partial polls. 409 * i.e. 0 <= nb_rx < MAX_BURST 410 * ep_nep[1] holds full polls 411 * i.e. nb_rx == MAX_BURST 412 */ 413 uint64_t fp_nfp[2]; 414 enum busy_rate br; 415 rte_spinlock_t telemetry_lock; 416 } __rte_cache_aligned; 417 418 static struct lcore_conf lcore_conf[RTE_MAX_LCORE] __rte_cache_aligned; 419 static struct lcore_stats stats[RTE_MAX_LCORE] __rte_cache_aligned; 420 static struct rte_timer power_timers[RTE_MAX_LCORE]; 421 422 static inline uint32_t power_idle_heuristic(uint32_t zero_rx_packet_count); 423 static inline enum freq_scale_hint_t power_freq_scaleup_heuristic( \ 424 unsigned int lcore_id, uint16_t port_id, uint16_t queue_id); 425 426 427 /* 428 * These defaults are using the max frequency index (1), a medium index (9) 429 * and a typical low frequency index (14). These can be adjusted to use 430 * different indexes using the relevant command line parameters. 431 */ 432 static uint8_t freq_tlb[] = {14, 9, 1}; 433 434 static int is_done(void) 435 { 436 return quit_signal; 437 } 438 439 /* exit signal handler */ 440 static void 441 signal_exit_now(int sigtype) 442 { 443 444 if (sigtype == SIGINT) 445 quit_signal = true; 446 447 } 448 449 /* Frequency scale down timer callback */ 450 static void 451 power_timer_cb(__rte_unused struct rte_timer *tim, 452 __rte_unused void *arg) 453 { 454 uint64_t hz; 455 float sleep_time_ratio; 456 unsigned lcore_id = rte_lcore_id(); 457 458 /* accumulate total execution time in us when callback is invoked */ 459 sleep_time_ratio = (float)(stats[lcore_id].sleep_time) / 460 (float)SCALING_PERIOD; 461 /** 462 * check whether need to scale down frequency a step if it sleep a lot. 463 */ 464 if (sleep_time_ratio >= SCALING_DOWN_TIME_RATIO_THRESHOLD) { 465 if (rte_power_freq_down) 466 rte_power_freq_down(lcore_id); 467 } 468 else if ( (unsigned)(stats[lcore_id].nb_rx_processed / 469 stats[lcore_id].nb_iteration_looped) < MAX_PKT_BURST) { 470 /** 471 * scale down a step if average packet per iteration less 472 * than expectation. 473 */ 474 if (rte_power_freq_down) 475 rte_power_freq_down(lcore_id); 476 } 477 478 /** 479 * initialize another timer according to current frequency to ensure 480 * timer interval is relatively fixed. 481 */ 482 hz = rte_get_timer_hz(); 483 rte_timer_reset(&power_timers[lcore_id], hz/TIMER_NUMBER_PER_SECOND, 484 SINGLE, lcore_id, power_timer_cb, NULL); 485 486 stats[lcore_id].nb_rx_processed = 0; 487 stats[lcore_id].nb_iteration_looped = 0; 488 489 stats[lcore_id].sleep_time = 0; 490 } 491 492 /* Enqueue a single packet, and send burst if queue is filled */ 493 static inline int 494 send_single_packet(struct rte_mbuf *m, uint16_t port) 495 { 496 uint32_t lcore_id; 497 struct lcore_conf *qconf; 498 499 lcore_id = rte_lcore_id(); 500 qconf = &lcore_conf[lcore_id]; 501 502 rte_eth_tx_buffer(port, qconf->tx_queue_id[port], 503 qconf->tx_buffer[port], m); 504 505 return 0; 506 } 507 508 #ifdef DO_RFC_1812_CHECKS 509 static inline int 510 is_valid_ipv4_pkt(struct rte_ipv4_hdr *pkt, uint32_t link_len) 511 { 512 /* From http://www.rfc-editor.org/rfc/rfc1812.txt section 5.2.2 */ 513 /* 514 * 1. The packet length reported by the Link Layer must be large 515 * enough to hold the minimum length legal IP datagram (20 bytes). 516 */ 517 if (link_len < sizeof(struct rte_ipv4_hdr)) 518 return -1; 519 520 /* 2. The IP checksum must be correct. */ 521 /* if this is not checked in H/W, check it. */ 522 if ((port_conf.rxmode.offloads & RTE_ETH_RX_OFFLOAD_IPV4_CKSUM) == 0) { 523 uint16_t actual_cksum, expected_cksum; 524 actual_cksum = pkt->hdr_checksum; 525 pkt->hdr_checksum = 0; 526 expected_cksum = rte_ipv4_cksum(pkt); 527 if (actual_cksum != expected_cksum) 528 return -2; 529 } 530 531 /* 532 * 3. The IP version number must be 4. If the version number is not 4 533 * then the packet may be another version of IP, such as IPng or 534 * ST-II. 535 */ 536 if (((pkt->version_ihl) >> 4) != 4) 537 return -3; 538 /* 539 * 4. The IP header length field must be large enough to hold the 540 * minimum length legal IP datagram (20 bytes = 5 words). 541 */ 542 if ((pkt->version_ihl & 0xf) < 5) 543 return -4; 544 545 /* 546 * 5. The IP total length field must be large enough to hold the IP 547 * datagram header, whose length is specified in the IP header length 548 * field. 549 */ 550 if (rte_cpu_to_be_16(pkt->total_length) < sizeof(struct rte_ipv4_hdr)) 551 return -5; 552 553 return 0; 554 } 555 #endif 556 557 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 558 static void 559 print_ipv4_key(struct ipv4_5tuple key) 560 { 561 printf("IP dst = %08x, IP src = %08x, port dst = %d, port src = %d, " 562 "proto = %d\n", (unsigned)key.ip_dst, (unsigned)key.ip_src, 563 key.port_dst, key.port_src, key.proto); 564 } 565 static void 566 print_ipv6_key(struct ipv6_5tuple key) 567 { 568 printf( "IP dst = " IPv6_BYTES_FMT ", IP src = " IPv6_BYTES_FMT ", " 569 "port dst = %d, port src = %d, proto = %d\n", 570 IPv6_BYTES(key.ip_dst), IPv6_BYTES(key.ip_src), 571 key.port_dst, key.port_src, key.proto); 572 } 573 574 static inline uint16_t 575 get_ipv4_dst_port(struct rte_ipv4_hdr *ipv4_hdr, uint16_t portid, 576 lookup_struct_t * ipv4_l3fwd_lookup_struct) 577 { 578 struct ipv4_5tuple key; 579 struct rte_tcp_hdr *tcp; 580 struct rte_udp_hdr *udp; 581 int ret = 0; 582 583 key.ip_dst = rte_be_to_cpu_32(ipv4_hdr->dst_addr); 584 key.ip_src = rte_be_to_cpu_32(ipv4_hdr->src_addr); 585 key.proto = ipv4_hdr->next_proto_id; 586 587 switch (ipv4_hdr->next_proto_id) { 588 case IPPROTO_TCP: 589 tcp = (struct rte_tcp_hdr *)((unsigned char *)ipv4_hdr + 590 sizeof(struct rte_ipv4_hdr)); 591 key.port_dst = rte_be_to_cpu_16(tcp->dst_port); 592 key.port_src = rte_be_to_cpu_16(tcp->src_port); 593 break; 594 595 case IPPROTO_UDP: 596 udp = (struct rte_udp_hdr *)((unsigned char *)ipv4_hdr + 597 sizeof(struct rte_ipv4_hdr)); 598 key.port_dst = rte_be_to_cpu_16(udp->dst_port); 599 key.port_src = rte_be_to_cpu_16(udp->src_port); 600 break; 601 602 default: 603 key.port_dst = 0; 604 key.port_src = 0; 605 break; 606 } 607 608 /* Find destination port */ 609 ret = rte_hash_lookup(ipv4_l3fwd_lookup_struct, (const void *)&key); 610 return ((ret < 0) ? portid : ipv4_l3fwd_out_if[ret]); 611 } 612 613 static inline uint16_t 614 get_ipv6_dst_port(struct rte_ipv6_hdr *ipv6_hdr, uint16_t portid, 615 lookup_struct_t *ipv6_l3fwd_lookup_struct) 616 { 617 struct ipv6_5tuple key; 618 struct rte_tcp_hdr *tcp; 619 struct rte_udp_hdr *udp; 620 int ret = 0; 621 622 memcpy(key.ip_dst, ipv6_hdr->dst_addr, IPV6_ADDR_LEN); 623 memcpy(key.ip_src, ipv6_hdr->src_addr, IPV6_ADDR_LEN); 624 625 key.proto = ipv6_hdr->proto; 626 627 switch (ipv6_hdr->proto) { 628 case IPPROTO_TCP: 629 tcp = (struct rte_tcp_hdr *)((unsigned char *) ipv6_hdr + 630 sizeof(struct rte_ipv6_hdr)); 631 key.port_dst = rte_be_to_cpu_16(tcp->dst_port); 632 key.port_src = rte_be_to_cpu_16(tcp->src_port); 633 break; 634 635 case IPPROTO_UDP: 636 udp = (struct rte_udp_hdr *)((unsigned char *) ipv6_hdr + 637 sizeof(struct rte_ipv6_hdr)); 638 key.port_dst = rte_be_to_cpu_16(udp->dst_port); 639 key.port_src = rte_be_to_cpu_16(udp->src_port); 640 break; 641 642 default: 643 key.port_dst = 0; 644 key.port_src = 0; 645 break; 646 } 647 648 /* Find destination port */ 649 ret = rte_hash_lookup(ipv6_l3fwd_lookup_struct, (const void *)&key); 650 return ((ret < 0) ? portid : ipv6_l3fwd_out_if[ret]); 651 } 652 #endif 653 654 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 655 static inline uint16_t 656 get_ipv4_dst_port(struct rte_ipv4_hdr *ipv4_hdr, uint16_t portid, 657 lookup_struct_t *ipv4_l3fwd_lookup_struct) 658 { 659 uint32_t next_hop; 660 661 return ((rte_lpm_lookup(ipv4_l3fwd_lookup_struct, 662 rte_be_to_cpu_32(ipv4_hdr->dst_addr), &next_hop) == 0)? 663 next_hop : portid); 664 } 665 #endif 666 667 static inline void 668 parse_ptype_one(struct rte_mbuf *m) 669 { 670 struct rte_ether_hdr *eth_hdr; 671 uint32_t packet_type = RTE_PTYPE_UNKNOWN; 672 uint16_t ether_type; 673 674 eth_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); 675 ether_type = eth_hdr->ether_type; 676 if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4)) 677 packet_type |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN; 678 else if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6)) 679 packet_type |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN; 680 681 m->packet_type = packet_type; 682 } 683 684 static uint16_t 685 cb_parse_ptype(uint16_t port __rte_unused, uint16_t queue __rte_unused, 686 struct rte_mbuf *pkts[], uint16_t nb_pkts, 687 uint16_t max_pkts __rte_unused, 688 void *user_param __rte_unused) 689 { 690 unsigned int i; 691 692 for (i = 0; i < nb_pkts; ++i) 693 parse_ptype_one(pkts[i]); 694 695 return nb_pkts; 696 } 697 698 static int 699 add_cb_parse_ptype(uint16_t portid, uint16_t queueid) 700 { 701 printf("Port %d: softly parse packet type info\n", portid); 702 if (rte_eth_add_rx_callback(portid, queueid, cb_parse_ptype, NULL)) 703 return 0; 704 705 printf("Failed to add rx callback: port=%d\n", portid); 706 return -1; 707 } 708 709 static inline void 710 l3fwd_simple_forward(struct rte_mbuf *m, uint16_t portid, 711 struct lcore_conf *qconf) 712 { 713 struct rte_ether_hdr *eth_hdr; 714 struct rte_ipv4_hdr *ipv4_hdr; 715 void *d_addr_bytes; 716 uint16_t dst_port; 717 718 eth_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); 719 720 if (RTE_ETH_IS_IPV4_HDR(m->packet_type)) { 721 /* Handle IPv4 headers.*/ 722 ipv4_hdr = 723 rte_pktmbuf_mtod_offset(m, struct rte_ipv4_hdr *, 724 sizeof(struct rte_ether_hdr)); 725 726 #ifdef DO_RFC_1812_CHECKS 727 /* Check to make sure the packet is valid (RFC1812) */ 728 if (is_valid_ipv4_pkt(ipv4_hdr, m->pkt_len) < 0) { 729 rte_pktmbuf_free(m); 730 return; 731 } 732 #endif 733 734 dst_port = get_ipv4_dst_port(ipv4_hdr, portid, 735 qconf->ipv4_lookup_struct); 736 if (dst_port >= RTE_MAX_ETHPORTS || 737 (enabled_port_mask & 1 << dst_port) == 0) 738 dst_port = portid; 739 740 /* 02:00:00:00:00:xx */ 741 d_addr_bytes = ð_hdr->dst_addr.addr_bytes[0]; 742 *((uint64_t *)d_addr_bytes) = 743 0x000000000002 + ((uint64_t)dst_port << 40); 744 745 #ifdef DO_RFC_1812_CHECKS 746 /* Update time to live and header checksum */ 747 --(ipv4_hdr->time_to_live); 748 ++(ipv4_hdr->hdr_checksum); 749 #endif 750 751 /* src addr */ 752 rte_ether_addr_copy(&ports_eth_addr[dst_port], 753 ð_hdr->src_addr); 754 755 send_single_packet(m, dst_port); 756 } else if (RTE_ETH_IS_IPV6_HDR(m->packet_type)) { 757 /* Handle IPv6 headers.*/ 758 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 759 struct rte_ipv6_hdr *ipv6_hdr; 760 761 ipv6_hdr = 762 rte_pktmbuf_mtod_offset(m, struct rte_ipv6_hdr *, 763 sizeof(struct rte_ether_hdr)); 764 765 dst_port = get_ipv6_dst_port(ipv6_hdr, portid, 766 qconf->ipv6_lookup_struct); 767 768 if (dst_port >= RTE_MAX_ETHPORTS || 769 (enabled_port_mask & 1 << dst_port) == 0) 770 dst_port = portid; 771 772 /* 02:00:00:00:00:xx */ 773 d_addr_bytes = ð_hdr->dst_addr.addr_bytes[0]; 774 *((uint64_t *)d_addr_bytes) = 775 0x000000000002 + ((uint64_t)dst_port << 40); 776 777 /* src addr */ 778 rte_ether_addr_copy(&ports_eth_addr[dst_port], 779 ð_hdr->src_addr); 780 781 send_single_packet(m, dst_port); 782 #else 783 /* We don't currently handle IPv6 packets in LPM mode. */ 784 rte_pktmbuf_free(m); 785 #endif 786 } else 787 rte_pktmbuf_free(m); 788 789 } 790 791 #define MINIMUM_SLEEP_TIME 1 792 #define SUSPEND_THRESHOLD 300 793 794 static inline uint32_t 795 power_idle_heuristic(uint32_t zero_rx_packet_count) 796 { 797 /* If zero count is less than 100, sleep 1us */ 798 if (zero_rx_packet_count < SUSPEND_THRESHOLD) 799 return MINIMUM_SLEEP_TIME; 800 /* If zero count is less than 1000, sleep 100 us which is the 801 minimum latency switching from C3/C6 to C0 802 */ 803 else 804 return SUSPEND_THRESHOLD; 805 } 806 807 static inline enum freq_scale_hint_t 808 power_freq_scaleup_heuristic(unsigned lcore_id, 809 uint16_t port_id, 810 uint16_t queue_id) 811 { 812 uint32_t rxq_count = rte_eth_rx_queue_count(port_id, queue_id); 813 /** 814 * HW Rx queue size is 128 by default, Rx burst read at maximum 32 entries 815 * per iteration 816 */ 817 #define FREQ_GEAR1_RX_PACKET_THRESHOLD MAX_PKT_BURST 818 #define FREQ_GEAR2_RX_PACKET_THRESHOLD (MAX_PKT_BURST*2) 819 #define FREQ_GEAR3_RX_PACKET_THRESHOLD (MAX_PKT_BURST*3) 820 #define FREQ_UP_TREND1_ACC 1 821 #define FREQ_UP_TREND2_ACC 100 822 #define FREQ_UP_THRESHOLD 10000 823 824 if (likely(rxq_count > FREQ_GEAR3_RX_PACKET_THRESHOLD)) { 825 stats[lcore_id].trend = 0; 826 return FREQ_HIGHEST; 827 } else if (likely(rxq_count > FREQ_GEAR2_RX_PACKET_THRESHOLD)) 828 stats[lcore_id].trend += FREQ_UP_TREND2_ACC; 829 else if (likely(rxq_count > FREQ_GEAR1_RX_PACKET_THRESHOLD)) 830 stats[lcore_id].trend += FREQ_UP_TREND1_ACC; 831 832 if (likely(stats[lcore_id].trend > FREQ_UP_THRESHOLD)) { 833 stats[lcore_id].trend = 0; 834 return FREQ_HIGHER; 835 } 836 837 return FREQ_CURRENT; 838 } 839 840 /** 841 * force polling thread sleep until one-shot rx interrupt triggers 842 * @param port_id 843 * Port id. 844 * @param queue_id 845 * Rx queue id. 846 * @return 847 * 0 on success 848 */ 849 static int 850 sleep_until_rx_interrupt(int num, int lcore) 851 { 852 /* 853 * we want to track when we are woken up by traffic so that we can go 854 * back to sleep again without log spamming. Avoid cache line sharing 855 * to prevent threads stepping on each others' toes. 856 */ 857 static struct { 858 bool wakeup; 859 } __rte_cache_aligned status[RTE_MAX_LCORE]; 860 struct rte_epoll_event event[num]; 861 int n, i; 862 uint16_t port_id; 863 uint8_t queue_id; 864 void *data; 865 866 if (status[lcore].wakeup) { 867 RTE_LOG(INFO, L3FWD_POWER, 868 "lcore %u sleeps until interrupt triggers\n", 869 rte_lcore_id()); 870 } 871 872 n = rte_epoll_wait(RTE_EPOLL_PER_THREAD, event, num, 10); 873 for (i = 0; i < n; i++) { 874 data = event[i].epdata.data; 875 port_id = ((uintptr_t)data) >> CHAR_BIT; 876 queue_id = ((uintptr_t)data) & 877 RTE_LEN2MASK(CHAR_BIT, uint8_t); 878 RTE_LOG(INFO, L3FWD_POWER, 879 "lcore %u is waked up from rx interrupt on" 880 " port %d queue %d\n", 881 rte_lcore_id(), port_id, queue_id); 882 } 883 status[lcore].wakeup = n != 0; 884 885 return 0; 886 } 887 888 static void turn_on_off_intr(struct lcore_conf *qconf, bool on) 889 { 890 int i; 891 struct lcore_rx_queue *rx_queue; 892 uint8_t queue_id; 893 uint16_t port_id; 894 895 for (i = 0; i < qconf->n_rx_queue; ++i) { 896 rx_queue = &(qconf->rx_queue_list[i]); 897 port_id = rx_queue->port_id; 898 queue_id = rx_queue->queue_id; 899 900 rte_spinlock_lock(&(locks[port_id])); 901 if (on) 902 rte_eth_dev_rx_intr_enable(port_id, queue_id); 903 else 904 rte_eth_dev_rx_intr_disable(port_id, queue_id); 905 rte_spinlock_unlock(&(locks[port_id])); 906 } 907 } 908 909 static int event_register(struct lcore_conf *qconf) 910 { 911 struct lcore_rx_queue *rx_queue; 912 uint8_t queueid; 913 uint16_t portid; 914 uint32_t data; 915 int ret; 916 int i; 917 918 for (i = 0; i < qconf->n_rx_queue; ++i) { 919 rx_queue = &(qconf->rx_queue_list[i]); 920 portid = rx_queue->port_id; 921 queueid = rx_queue->queue_id; 922 data = portid << CHAR_BIT | queueid; 923 924 ret = rte_eth_dev_rx_intr_ctl_q(portid, queueid, 925 RTE_EPOLL_PER_THREAD, 926 RTE_INTR_EVENT_ADD, 927 (void *)((uintptr_t)data)); 928 if (ret) 929 return ret; 930 } 931 932 return 0; 933 } 934 935 /* Main processing loop. 8< */ 936 static int main_intr_loop(__rte_unused void *dummy) 937 { 938 struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; 939 unsigned int lcore_id; 940 uint64_t prev_tsc, diff_tsc, cur_tsc; 941 int i, j, nb_rx; 942 uint8_t queueid; 943 uint16_t portid; 944 struct lcore_conf *qconf; 945 struct lcore_rx_queue *rx_queue; 946 uint32_t lcore_rx_idle_count = 0; 947 uint32_t lcore_idle_hint = 0; 948 int intr_en = 0; 949 950 const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / 951 US_PER_S * BURST_TX_DRAIN_US; 952 953 prev_tsc = 0; 954 955 lcore_id = rte_lcore_id(); 956 qconf = &lcore_conf[lcore_id]; 957 958 if (qconf->n_rx_queue == 0) { 959 RTE_LOG(INFO, L3FWD_POWER, "lcore %u has nothing to do\n", 960 lcore_id); 961 return 0; 962 } 963 964 RTE_LOG(INFO, L3FWD_POWER, "entering main interrupt loop on lcore %u\n", 965 lcore_id); 966 967 for (i = 0; i < qconf->n_rx_queue; i++) { 968 portid = qconf->rx_queue_list[i].port_id; 969 queueid = qconf->rx_queue_list[i].queue_id; 970 RTE_LOG(INFO, L3FWD_POWER, 971 " -- lcoreid=%u portid=%u rxqueueid=%hhu\n", 972 lcore_id, portid, queueid); 973 } 974 975 /* add into event wait list */ 976 if (event_register(qconf) == 0) 977 intr_en = 1; 978 else 979 RTE_LOG(INFO, L3FWD_POWER, "RX interrupt won't enable.\n"); 980 981 while (!is_done()) { 982 stats[lcore_id].nb_iteration_looped++; 983 984 cur_tsc = rte_rdtsc(); 985 986 /* 987 * TX burst queue drain 988 */ 989 diff_tsc = cur_tsc - prev_tsc; 990 if (unlikely(diff_tsc > drain_tsc)) { 991 for (i = 0; i < qconf->n_tx_port; ++i) { 992 portid = qconf->tx_port_id[i]; 993 rte_eth_tx_buffer_flush(portid, 994 qconf->tx_queue_id[portid], 995 qconf->tx_buffer[portid]); 996 } 997 prev_tsc = cur_tsc; 998 } 999 1000 start_rx: 1001 /* 1002 * Read packet from RX queues 1003 */ 1004 lcore_rx_idle_count = 0; 1005 for (i = 0; i < qconf->n_rx_queue; ++i) { 1006 rx_queue = &(qconf->rx_queue_list[i]); 1007 rx_queue->idle_hint = 0; 1008 portid = rx_queue->port_id; 1009 queueid = rx_queue->queue_id; 1010 1011 nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst, 1012 MAX_PKT_BURST); 1013 1014 stats[lcore_id].nb_rx_processed += nb_rx; 1015 if (unlikely(nb_rx == 0)) { 1016 /** 1017 * no packet received from rx queue, try to 1018 * sleep for a while forcing CPU enter deeper 1019 * C states. 1020 */ 1021 rx_queue->zero_rx_packet_count++; 1022 1023 if (rx_queue->zero_rx_packet_count <= 1024 MIN_ZERO_POLL_COUNT) 1025 continue; 1026 1027 rx_queue->idle_hint = power_idle_heuristic( 1028 rx_queue->zero_rx_packet_count); 1029 lcore_rx_idle_count++; 1030 } else { 1031 rx_queue->zero_rx_packet_count = 0; 1032 } 1033 1034 /* Prefetch first packets */ 1035 for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++) { 1036 rte_prefetch0(rte_pktmbuf_mtod( 1037 pkts_burst[j], void *)); 1038 } 1039 1040 /* Prefetch and forward already prefetched packets */ 1041 for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) { 1042 rte_prefetch0(rte_pktmbuf_mtod( 1043 pkts_burst[j + PREFETCH_OFFSET], 1044 void *)); 1045 l3fwd_simple_forward( 1046 pkts_burst[j], portid, qconf); 1047 } 1048 1049 /* Forward remaining prefetched packets */ 1050 for (; j < nb_rx; j++) { 1051 l3fwd_simple_forward( 1052 pkts_burst[j], portid, qconf); 1053 } 1054 } 1055 1056 if (unlikely(lcore_rx_idle_count == qconf->n_rx_queue)) { 1057 /** 1058 * All Rx queues empty in recent consecutive polls, 1059 * sleep in a conservative manner, meaning sleep as 1060 * less as possible. 1061 */ 1062 for (i = 1, 1063 lcore_idle_hint = qconf->rx_queue_list[0].idle_hint; 1064 i < qconf->n_rx_queue; ++i) { 1065 rx_queue = &(qconf->rx_queue_list[i]); 1066 if (rx_queue->idle_hint < lcore_idle_hint) 1067 lcore_idle_hint = rx_queue->idle_hint; 1068 } 1069 1070 if (lcore_idle_hint < SUSPEND_THRESHOLD) 1071 /** 1072 * execute "pause" instruction to avoid context 1073 * switch which generally take hundred of 1074 * microseconds for short sleep. 1075 */ 1076 rte_delay_us(lcore_idle_hint); 1077 else { 1078 /* suspend until rx interrupt triggers */ 1079 if (intr_en) { 1080 turn_on_off_intr(qconf, 1); 1081 sleep_until_rx_interrupt( 1082 qconf->n_rx_queue, 1083 lcore_id); 1084 turn_on_off_intr(qconf, 0); 1085 /** 1086 * start receiving packets immediately 1087 */ 1088 if (likely(!is_done())) 1089 goto start_rx; 1090 } 1091 } 1092 stats[lcore_id].sleep_time += lcore_idle_hint; 1093 } 1094 } 1095 1096 return 0; 1097 } 1098 /* >8 End of main processing loop. */ 1099 1100 /* main processing loop */ 1101 static int 1102 main_telemetry_loop(__rte_unused void *dummy) 1103 { 1104 struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; 1105 unsigned int lcore_id; 1106 uint64_t prev_tsc, diff_tsc, cur_tsc, prev_tel_tsc; 1107 int i, j, nb_rx; 1108 uint8_t queueid; 1109 uint16_t portid; 1110 struct lcore_conf *qconf; 1111 struct lcore_rx_queue *rx_queue; 1112 uint64_t ep_nep[2] = {0}, fp_nfp[2] = {0}; 1113 uint64_t poll_count; 1114 enum busy_rate br; 1115 1116 const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / 1117 US_PER_S * BURST_TX_DRAIN_US; 1118 1119 poll_count = 0; 1120 prev_tsc = 0; 1121 prev_tel_tsc = 0; 1122 1123 lcore_id = rte_lcore_id(); 1124 qconf = &lcore_conf[lcore_id]; 1125 1126 if (qconf->n_rx_queue == 0) { 1127 RTE_LOG(INFO, L3FWD_POWER, "lcore %u has nothing to do\n", 1128 lcore_id); 1129 return 0; 1130 } 1131 1132 RTE_LOG(INFO, L3FWD_POWER, "entering main telemetry loop on lcore %u\n", 1133 lcore_id); 1134 1135 for (i = 0; i < qconf->n_rx_queue; i++) { 1136 portid = qconf->rx_queue_list[i].port_id; 1137 queueid = qconf->rx_queue_list[i].queue_id; 1138 RTE_LOG(INFO, L3FWD_POWER, " -- lcoreid=%u portid=%u " 1139 "rxqueueid=%hhu\n", lcore_id, portid, queueid); 1140 } 1141 1142 while (!is_done()) { 1143 1144 cur_tsc = rte_rdtsc(); 1145 /* 1146 * TX burst queue drain 1147 */ 1148 diff_tsc = cur_tsc - prev_tsc; 1149 if (unlikely(diff_tsc > drain_tsc)) { 1150 for (i = 0; i < qconf->n_tx_port; ++i) { 1151 portid = qconf->tx_port_id[i]; 1152 rte_eth_tx_buffer_flush(portid, 1153 qconf->tx_queue_id[portid], 1154 qconf->tx_buffer[portid]); 1155 } 1156 prev_tsc = cur_tsc; 1157 } 1158 1159 /* 1160 * Read packet from RX queues 1161 */ 1162 for (i = 0; i < qconf->n_rx_queue; ++i) { 1163 rx_queue = &(qconf->rx_queue_list[i]); 1164 portid = rx_queue->port_id; 1165 queueid = rx_queue->queue_id; 1166 1167 nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst, 1168 MAX_PKT_BURST); 1169 ep_nep[nb_rx == 0]++; 1170 fp_nfp[nb_rx == MAX_PKT_BURST]++; 1171 poll_count++; 1172 if (unlikely(nb_rx == 0)) 1173 continue; 1174 1175 /* Prefetch first packets */ 1176 for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++) { 1177 rte_prefetch0(rte_pktmbuf_mtod( 1178 pkts_burst[j], void *)); 1179 } 1180 1181 /* Prefetch and forward already prefetched packets */ 1182 for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) { 1183 rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[ 1184 j + PREFETCH_OFFSET], void *)); 1185 l3fwd_simple_forward(pkts_burst[j], portid, 1186 qconf); 1187 } 1188 1189 /* Forward remaining prefetched packets */ 1190 for (; j < nb_rx; j++) { 1191 l3fwd_simple_forward(pkts_burst[j], portid, 1192 qconf); 1193 } 1194 } 1195 if (unlikely(poll_count >= DEFAULT_COUNT)) { 1196 diff_tsc = cur_tsc - prev_tel_tsc; 1197 if (diff_tsc >= MAX_CYCLES) { 1198 br = FULL; 1199 } else if (diff_tsc > MIN_CYCLES && 1200 diff_tsc < MAX_CYCLES) { 1201 br = (diff_tsc * 100) / MAX_CYCLES; 1202 } else { 1203 br = ZERO; 1204 } 1205 poll_count = 0; 1206 prev_tel_tsc = cur_tsc; 1207 /* update stats for telemetry */ 1208 rte_spinlock_lock(&stats[lcore_id].telemetry_lock); 1209 stats[lcore_id].ep_nep[0] = ep_nep[0]; 1210 stats[lcore_id].ep_nep[1] = ep_nep[1]; 1211 stats[lcore_id].fp_nfp[0] = fp_nfp[0]; 1212 stats[lcore_id].fp_nfp[1] = fp_nfp[1]; 1213 stats[lcore_id].br = br; 1214 rte_spinlock_unlock(&stats[lcore_id].telemetry_lock); 1215 } 1216 } 1217 1218 return 0; 1219 } 1220 /* main processing loop */ 1221 static int 1222 main_empty_poll_loop(__rte_unused void *dummy) 1223 { 1224 struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; 1225 unsigned int lcore_id; 1226 uint64_t prev_tsc, diff_tsc, cur_tsc; 1227 int i, j, nb_rx; 1228 uint8_t queueid; 1229 uint16_t portid; 1230 struct lcore_conf *qconf; 1231 struct lcore_rx_queue *rx_queue; 1232 1233 const uint64_t drain_tsc = 1234 (rte_get_tsc_hz() + US_PER_S - 1) / 1235 US_PER_S * BURST_TX_DRAIN_US; 1236 1237 prev_tsc = 0; 1238 1239 lcore_id = rte_lcore_id(); 1240 qconf = &lcore_conf[lcore_id]; 1241 1242 if (qconf->n_rx_queue == 0) { 1243 RTE_LOG(INFO, L3FWD_POWER, "lcore %u has nothing to do\n", 1244 lcore_id); 1245 return 0; 1246 } 1247 1248 for (i = 0; i < qconf->n_rx_queue; i++) { 1249 portid = qconf->rx_queue_list[i].port_id; 1250 queueid = qconf->rx_queue_list[i].queue_id; 1251 RTE_LOG(INFO, L3FWD_POWER, " -- lcoreid=%u portid=%u " 1252 "rxqueueid=%hhu\n", lcore_id, portid, queueid); 1253 } 1254 1255 while (!is_done()) { 1256 stats[lcore_id].nb_iteration_looped++; 1257 1258 cur_tsc = rte_rdtsc(); 1259 /* 1260 * TX burst queue drain 1261 */ 1262 diff_tsc = cur_tsc - prev_tsc; 1263 if (unlikely(diff_tsc > drain_tsc)) { 1264 for (i = 0; i < qconf->n_tx_port; ++i) { 1265 portid = qconf->tx_port_id[i]; 1266 rte_eth_tx_buffer_flush(portid, 1267 qconf->tx_queue_id[portid], 1268 qconf->tx_buffer[portid]); 1269 } 1270 prev_tsc = cur_tsc; 1271 } 1272 1273 /* 1274 * Read packet from RX queues 1275 */ 1276 for (i = 0; i < qconf->n_rx_queue; ++i) { 1277 rx_queue = &(qconf->rx_queue_list[i]); 1278 rx_queue->idle_hint = 0; 1279 portid = rx_queue->port_id; 1280 queueid = rx_queue->queue_id; 1281 1282 nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst, 1283 MAX_PKT_BURST); 1284 1285 stats[lcore_id].nb_rx_processed += nb_rx; 1286 1287 if (nb_rx == 0) { 1288 1289 rte_power_empty_poll_stat_update(lcore_id); 1290 1291 continue; 1292 } else { 1293 rte_power_poll_stat_update(lcore_id, nb_rx); 1294 } 1295 1296 1297 /* Prefetch first packets */ 1298 for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++) { 1299 rte_prefetch0(rte_pktmbuf_mtod( 1300 pkts_burst[j], void *)); 1301 } 1302 1303 /* Prefetch and forward already prefetched packets */ 1304 for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) { 1305 rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[ 1306 j + PREFETCH_OFFSET], 1307 void *)); 1308 l3fwd_simple_forward(pkts_burst[j], portid, 1309 qconf); 1310 } 1311 1312 /* Forward remaining prefetched packets */ 1313 for (; j < nb_rx; j++) { 1314 l3fwd_simple_forward(pkts_burst[j], portid, 1315 qconf); 1316 } 1317 1318 } 1319 1320 } 1321 1322 return 0; 1323 } 1324 /* main processing loop */ 1325 static int 1326 main_legacy_loop(__rte_unused void *dummy) 1327 { 1328 struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; 1329 unsigned lcore_id; 1330 uint64_t prev_tsc, diff_tsc, cur_tsc, tim_res_tsc, hz; 1331 uint64_t prev_tsc_power = 0, cur_tsc_power, diff_tsc_power; 1332 int i, j, nb_rx; 1333 uint8_t queueid; 1334 uint16_t portid; 1335 struct lcore_conf *qconf; 1336 struct lcore_rx_queue *rx_queue; 1337 enum freq_scale_hint_t lcore_scaleup_hint; 1338 uint32_t lcore_rx_idle_count = 0; 1339 uint32_t lcore_idle_hint = 0; 1340 int intr_en = 0; 1341 1342 const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * BURST_TX_DRAIN_US; 1343 1344 prev_tsc = 0; 1345 hz = rte_get_timer_hz(); 1346 tim_res_tsc = hz/TIMER_NUMBER_PER_SECOND; 1347 1348 lcore_id = rte_lcore_id(); 1349 qconf = &lcore_conf[lcore_id]; 1350 1351 if (qconf->n_rx_queue == 0) { 1352 RTE_LOG(INFO, L3FWD_POWER, "lcore %u has nothing to do\n", lcore_id); 1353 return 0; 1354 } 1355 1356 RTE_LOG(INFO, L3FWD_POWER, "entering main loop on lcore %u\n", lcore_id); 1357 1358 for (i = 0; i < qconf->n_rx_queue; i++) { 1359 portid = qconf->rx_queue_list[i].port_id; 1360 queueid = qconf->rx_queue_list[i].queue_id; 1361 RTE_LOG(INFO, L3FWD_POWER, " -- lcoreid=%u portid=%u " 1362 "rxqueueid=%hhu\n", lcore_id, portid, queueid); 1363 } 1364 1365 /* add into event wait list */ 1366 if (event_register(qconf) == 0) 1367 intr_en = 1; 1368 else 1369 RTE_LOG(INFO, L3FWD_POWER, "RX interrupt won't enable.\n"); 1370 1371 while (!is_done()) { 1372 stats[lcore_id].nb_iteration_looped++; 1373 1374 cur_tsc = rte_rdtsc(); 1375 cur_tsc_power = cur_tsc; 1376 1377 /* 1378 * TX burst queue drain 1379 */ 1380 diff_tsc = cur_tsc - prev_tsc; 1381 if (unlikely(diff_tsc > drain_tsc)) { 1382 for (i = 0; i < qconf->n_tx_port; ++i) { 1383 portid = qconf->tx_port_id[i]; 1384 rte_eth_tx_buffer_flush(portid, 1385 qconf->tx_queue_id[portid], 1386 qconf->tx_buffer[portid]); 1387 } 1388 prev_tsc = cur_tsc; 1389 } 1390 1391 diff_tsc_power = cur_tsc_power - prev_tsc_power; 1392 if (diff_tsc_power > tim_res_tsc) { 1393 rte_timer_manage(); 1394 prev_tsc_power = cur_tsc_power; 1395 } 1396 1397 start_rx: 1398 /* 1399 * Read packet from RX queues 1400 */ 1401 lcore_scaleup_hint = FREQ_CURRENT; 1402 lcore_rx_idle_count = 0; 1403 for (i = 0; i < qconf->n_rx_queue; ++i) { 1404 rx_queue = &(qconf->rx_queue_list[i]); 1405 rx_queue->idle_hint = 0; 1406 portid = rx_queue->port_id; 1407 queueid = rx_queue->queue_id; 1408 1409 nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst, 1410 MAX_PKT_BURST); 1411 1412 stats[lcore_id].nb_rx_processed += nb_rx; 1413 if (unlikely(nb_rx == 0)) { 1414 /** 1415 * no packet received from rx queue, try to 1416 * sleep for a while forcing CPU enter deeper 1417 * C states. 1418 */ 1419 rx_queue->zero_rx_packet_count++; 1420 1421 if (rx_queue->zero_rx_packet_count <= 1422 MIN_ZERO_POLL_COUNT) 1423 continue; 1424 1425 rx_queue->idle_hint = power_idle_heuristic(\ 1426 rx_queue->zero_rx_packet_count); 1427 lcore_rx_idle_count++; 1428 } else { 1429 rx_queue->zero_rx_packet_count = 0; 1430 1431 /** 1432 * do not scale up frequency immediately as 1433 * user to kernel space communication is costly 1434 * which might impact packet I/O for received 1435 * packets. 1436 */ 1437 rx_queue->freq_up_hint = 1438 power_freq_scaleup_heuristic(lcore_id, 1439 portid, queueid); 1440 } 1441 1442 /* Prefetch first packets */ 1443 for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++) { 1444 rte_prefetch0(rte_pktmbuf_mtod( 1445 pkts_burst[j], void *)); 1446 } 1447 1448 /* Prefetch and forward already prefetched packets */ 1449 for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) { 1450 rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[ 1451 j + PREFETCH_OFFSET], void *)); 1452 l3fwd_simple_forward(pkts_burst[j], portid, 1453 qconf); 1454 } 1455 1456 /* Forward remaining prefetched packets */ 1457 for (; j < nb_rx; j++) { 1458 l3fwd_simple_forward(pkts_burst[j], portid, 1459 qconf); 1460 } 1461 } 1462 1463 if (likely(lcore_rx_idle_count != qconf->n_rx_queue)) { 1464 for (i = 1, lcore_scaleup_hint = 1465 qconf->rx_queue_list[0].freq_up_hint; 1466 i < qconf->n_rx_queue; ++i) { 1467 rx_queue = &(qconf->rx_queue_list[i]); 1468 if (rx_queue->freq_up_hint > 1469 lcore_scaleup_hint) 1470 lcore_scaleup_hint = 1471 rx_queue->freq_up_hint; 1472 } 1473 1474 if (lcore_scaleup_hint == FREQ_HIGHEST) { 1475 if (rte_power_freq_max) 1476 rte_power_freq_max(lcore_id); 1477 } else if (lcore_scaleup_hint == FREQ_HIGHER) { 1478 if (rte_power_freq_up) 1479 rte_power_freq_up(lcore_id); 1480 } 1481 } else { 1482 /** 1483 * All Rx queues empty in recent consecutive polls, 1484 * sleep in a conservative manner, meaning sleep as 1485 * less as possible. 1486 */ 1487 for (i = 1, lcore_idle_hint = 1488 qconf->rx_queue_list[0].idle_hint; 1489 i < qconf->n_rx_queue; ++i) { 1490 rx_queue = &(qconf->rx_queue_list[i]); 1491 if (rx_queue->idle_hint < lcore_idle_hint) 1492 lcore_idle_hint = rx_queue->idle_hint; 1493 } 1494 1495 if (lcore_idle_hint < SUSPEND_THRESHOLD) 1496 /** 1497 * execute "pause" instruction to avoid context 1498 * switch which generally take hundred of 1499 * microseconds for short sleep. 1500 */ 1501 rte_delay_us(lcore_idle_hint); 1502 else { 1503 /* suspend until rx interrupt triggers */ 1504 if (intr_en) { 1505 turn_on_off_intr(qconf, 1); 1506 sleep_until_rx_interrupt( 1507 qconf->n_rx_queue, 1508 lcore_id); 1509 turn_on_off_intr(qconf, 0); 1510 /** 1511 * start receiving packets immediately 1512 */ 1513 if (likely(!is_done())) 1514 goto start_rx; 1515 } 1516 } 1517 stats[lcore_id].sleep_time += lcore_idle_hint; 1518 } 1519 } 1520 1521 return 0; 1522 } 1523 1524 static int 1525 check_lcore_params(void) 1526 { 1527 uint8_t queue, lcore; 1528 uint16_t i; 1529 int socketid; 1530 1531 for (i = 0; i < nb_lcore_params; ++i) { 1532 queue = lcore_params[i].queue_id; 1533 if (queue >= MAX_RX_QUEUE_PER_PORT) { 1534 printf("invalid queue number: %hhu\n", queue); 1535 return -1; 1536 } 1537 lcore = lcore_params[i].lcore_id; 1538 if (!rte_lcore_is_enabled(lcore)) { 1539 printf("error: lcore %hhu is not enabled in lcore " 1540 "mask\n", lcore); 1541 return -1; 1542 } 1543 if ((socketid = rte_lcore_to_socket_id(lcore) != 0) && 1544 (numa_on == 0)) { 1545 printf("warning: lcore %hhu is on socket %d with numa " 1546 "off\n", lcore, socketid); 1547 } 1548 if (app_mode == APP_MODE_TELEMETRY && lcore == rte_lcore_id()) { 1549 printf("cannot enable main core %d in config for telemetry mode\n", 1550 rte_lcore_id()); 1551 return -1; 1552 } 1553 } 1554 return 0; 1555 } 1556 1557 static int 1558 check_port_config(void) 1559 { 1560 unsigned portid; 1561 uint16_t i; 1562 1563 for (i = 0; i < nb_lcore_params; ++i) { 1564 portid = lcore_params[i].port_id; 1565 if ((enabled_port_mask & (1 << portid)) == 0) { 1566 printf("port %u is not enabled in port mask\n", 1567 portid); 1568 return -1; 1569 } 1570 if (!rte_eth_dev_is_valid_port(portid)) { 1571 printf("port %u is not present on the board\n", 1572 portid); 1573 return -1; 1574 } 1575 } 1576 return 0; 1577 } 1578 1579 static uint8_t 1580 get_port_n_rx_queues(const uint16_t port) 1581 { 1582 int queue = -1; 1583 uint16_t i; 1584 1585 for (i = 0; i < nb_lcore_params; ++i) { 1586 if (lcore_params[i].port_id == port && 1587 lcore_params[i].queue_id > queue) 1588 queue = lcore_params[i].queue_id; 1589 } 1590 return (uint8_t)(++queue); 1591 } 1592 1593 static int 1594 init_lcore_rx_queues(void) 1595 { 1596 uint16_t i, nb_rx_queue; 1597 uint8_t lcore; 1598 1599 for (i = 0; i < nb_lcore_params; ++i) { 1600 lcore = lcore_params[i].lcore_id; 1601 nb_rx_queue = lcore_conf[lcore].n_rx_queue; 1602 if (nb_rx_queue >= MAX_RX_QUEUE_PER_LCORE) { 1603 printf("error: too many queues (%u) for lcore: %u\n", 1604 (unsigned)nb_rx_queue + 1, (unsigned)lcore); 1605 return -1; 1606 } else { 1607 lcore_conf[lcore].rx_queue_list[nb_rx_queue].port_id = 1608 lcore_params[i].port_id; 1609 lcore_conf[lcore].rx_queue_list[nb_rx_queue].queue_id = 1610 lcore_params[i].queue_id; 1611 lcore_conf[lcore].n_rx_queue++; 1612 } 1613 } 1614 return 0; 1615 } 1616 1617 /* display usage */ 1618 static void 1619 print_usage(const char *prgname) 1620 { 1621 printf ("%s [EAL options] -- -p PORTMASK -P" 1622 " [--config (port,queue,lcore)[,(port,queue,lcore]]" 1623 " [--high-perf-cores CORELIST" 1624 " [--perf-config (port,queue,hi_perf,lcore_index)[,(port,queue,hi_perf,lcore_index]]" 1625 " [--max-pkt-len PKTLEN]\n" 1626 " -p PORTMASK: hexadecimal bitmask of ports to configure\n" 1627 " -P: enable promiscuous mode\n" 1628 " -u: set min/max frequency for uncore to minimum value\n" 1629 " -U: set min/max frequency for uncore to maximum value\n" 1630 " -i (frequency index): set min/max frequency for uncore to specified frequency index\n" 1631 " --config (port,queue,lcore): rx queues configuration\n" 1632 " --high-perf-cores CORELIST: list of high performance cores\n" 1633 " --perf-config: similar as config, cores specified as indices" 1634 " for bins containing high or regular performance cores\n" 1635 " --no-numa: optional, disable numa awareness\n" 1636 " --max-pkt-len PKTLEN: maximum packet length in decimal (64-9600)\n" 1637 " --parse-ptype: parse packet type by software\n" 1638 " --legacy: use legacy interrupt-based scaling\n" 1639 " --empty-poll: enable empty poll detection" 1640 " follow (training_flag, high_threshold, med_threshold)\n" 1641 " --telemetry: enable telemetry mode, to update" 1642 " empty polls, full polls, and core busyness to telemetry\n" 1643 " --interrupt-only: enable interrupt-only mode\n" 1644 " --pmd-mgmt MODE: enable PMD power management mode. " 1645 "Currently supported modes: baseline, monitor, pause, scale\n" 1646 " --max-empty-polls MAX_EMPTY_POLLS: number of empty polls to" 1647 " wait before entering sleep state\n" 1648 " --pause-duration DURATION: set the duration, in microseconds," 1649 " of the pause callback\n" 1650 " --scale-freq-min FREQ_MIN: set minimum frequency for scaling mode for" 1651 " all application lcores (FREQ_MIN must be in kHz, in increments of 100MHz)\n" 1652 " --scale-freq-max FREQ_MAX: set maximum frequency for scaling mode for" 1653 " all application lcores (FREQ_MAX must be in kHz, in increments of 100MHz)\n", 1654 prgname); 1655 } 1656 1657 static int 1658 parse_int(const char *opt) 1659 { 1660 char *end = NULL; 1661 unsigned long val; 1662 1663 /* parse integer string */ 1664 val = strtoul(opt, &end, 10); 1665 if ((opt[0] == '\0') || (end == NULL) || (*end != '\0')) 1666 return -1; 1667 1668 return val; 1669 } 1670 1671 static int parse_max_pkt_len(const char *pktlen) 1672 { 1673 char *end = NULL; 1674 unsigned long len; 1675 1676 /* parse decimal string */ 1677 len = strtoul(pktlen, &end, 10); 1678 if ((pktlen[0] == '\0') || (end == NULL) || (*end != '\0')) 1679 return -1; 1680 1681 if (len == 0) 1682 return -1; 1683 1684 return len; 1685 } 1686 1687 static int 1688 parse_uncore_options(enum uncore_choice choice, const char *argument) 1689 { 1690 unsigned int die, pkg, max_pkg, max_die; 1691 int ret = 0; 1692 max_pkg = rte_power_uncore_get_num_pkgs(); 1693 if (max_pkg == 0) 1694 return -1; 1695 1696 for (pkg = 0; pkg < max_pkg; pkg++) { 1697 max_die = rte_power_uncore_get_num_dies(pkg); 1698 if (max_die == 0) 1699 return -1; 1700 for (die = 0; die < max_die; die++) { 1701 ret = rte_power_uncore_init(pkg, die); 1702 if (ret == -1) { 1703 RTE_LOG(INFO, L3FWD_POWER, "Unable to initialize uncore for pkg %02u die %02u\n" 1704 , pkg, die); 1705 return ret; 1706 } 1707 if (choice == UNCORE_MIN) { 1708 ret = rte_power_uncore_freq_min(pkg, die); 1709 if (ret == -1) { 1710 RTE_LOG(INFO, L3FWD_POWER, 1711 "Unable to set the uncore min/max to minimum uncore frequency value for pkg %02u die %02u\n" 1712 , pkg, die); 1713 return ret; 1714 } 1715 } else if (choice == UNCORE_MAX) { 1716 ret = rte_power_uncore_freq_max(pkg, die); 1717 if (ret == -1) { 1718 RTE_LOG(INFO, L3FWD_POWER, 1719 "Unable to set uncore min/max to maximum uncore frequency value for pkg %02u die %02u\n" 1720 , pkg, die); 1721 return ret; 1722 } 1723 } else if (choice == UNCORE_IDX) { 1724 char *ptr = NULL; 1725 int frequency_index = strtol(argument, &ptr, 10); 1726 if (argument == ptr) { 1727 RTE_LOG(INFO, L3FWD_POWER, "Index given is not a valid number."); 1728 return -1; 1729 } 1730 int freq_array_len = rte_power_uncore_get_num_freqs(pkg, die); 1731 if (frequency_index > freq_array_len - 1) { 1732 RTE_LOG(INFO, L3FWD_POWER, 1733 "Frequency index given out of range, please choose a value from 0 to %d.\n", 1734 freq_array_len); 1735 return -1; 1736 } 1737 ret = rte_power_set_uncore_freq(pkg, die, frequency_index); 1738 if (ret == -1) { 1739 RTE_LOG(INFO, L3FWD_POWER, 1740 "Unable to set min/max uncore index value for pkg %02u die %02u\n", 1741 pkg, die); 1742 return ret; 1743 } 1744 } else { 1745 RTE_LOG(INFO, L3FWD_POWER, "Uncore choice provided invalid\n"); 1746 return -1; 1747 } 1748 } 1749 } 1750 1751 RTE_LOG(INFO, L3FWD_POWER, "Successfully set max/min/index uncore frequency.\n"); 1752 return ret; 1753 } 1754 1755 static int 1756 parse_portmask(const char *portmask) 1757 { 1758 char *end = NULL; 1759 unsigned long pm; 1760 1761 /* parse hexadecimal string */ 1762 pm = strtoul(portmask, &end, 16); 1763 if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0')) 1764 return 0; 1765 1766 return pm; 1767 } 1768 1769 static int 1770 parse_config(const char *q_arg) 1771 { 1772 char s[256]; 1773 const char *p, *p0 = q_arg; 1774 char *end; 1775 enum fieldnames { 1776 FLD_PORT = 0, 1777 FLD_QUEUE, 1778 FLD_LCORE, 1779 _NUM_FLD 1780 }; 1781 unsigned long int_fld[_NUM_FLD]; 1782 char *str_fld[_NUM_FLD]; 1783 int i; 1784 unsigned size; 1785 1786 nb_lcore_params = 0; 1787 1788 while ((p = strchr(p0,'(')) != NULL) { 1789 ++p; 1790 if((p0 = strchr(p,')')) == NULL) 1791 return -1; 1792 1793 size = p0 - p; 1794 if(size >= sizeof(s)) 1795 return -1; 1796 1797 snprintf(s, sizeof(s), "%.*s", size, p); 1798 if (rte_strsplit(s, sizeof(s), str_fld, _NUM_FLD, ',') != 1799 _NUM_FLD) 1800 return -1; 1801 for (i = 0; i < _NUM_FLD; i++){ 1802 errno = 0; 1803 int_fld[i] = strtoul(str_fld[i], &end, 0); 1804 if (errno != 0 || end == str_fld[i] || int_fld[i] > 1805 255) 1806 return -1; 1807 } 1808 if (nb_lcore_params >= MAX_LCORE_PARAMS) { 1809 printf("exceeded max number of lcore params: %hu\n", 1810 nb_lcore_params); 1811 return -1; 1812 } 1813 lcore_params_array[nb_lcore_params].port_id = 1814 (uint8_t)int_fld[FLD_PORT]; 1815 lcore_params_array[nb_lcore_params].queue_id = 1816 (uint8_t)int_fld[FLD_QUEUE]; 1817 lcore_params_array[nb_lcore_params].lcore_id = 1818 (uint8_t)int_fld[FLD_LCORE]; 1819 ++nb_lcore_params; 1820 } 1821 lcore_params = lcore_params_array; 1822 1823 return 0; 1824 } 1825 1826 static int 1827 parse_pmd_mgmt_config(const char *name) 1828 { 1829 #define PMD_MGMT_MONITOR "monitor" 1830 #define PMD_MGMT_PAUSE "pause" 1831 #define PMD_MGMT_SCALE "scale" 1832 #define PMD_MGMT_BASELINE "baseline" 1833 1834 if (strncmp(PMD_MGMT_MONITOR, name, sizeof(PMD_MGMT_MONITOR)) == 0) { 1835 pmgmt_type = RTE_POWER_MGMT_TYPE_MONITOR; 1836 return 0; 1837 } 1838 1839 if (strncmp(PMD_MGMT_PAUSE, name, sizeof(PMD_MGMT_PAUSE)) == 0) { 1840 pmgmt_type = RTE_POWER_MGMT_TYPE_PAUSE; 1841 return 0; 1842 } 1843 1844 if (strncmp(PMD_MGMT_SCALE, name, sizeof(PMD_MGMT_SCALE)) == 0) { 1845 pmgmt_type = RTE_POWER_MGMT_TYPE_SCALE; 1846 return 0; 1847 } 1848 if (strncmp(PMD_MGMT_BASELINE, name, sizeof(PMD_MGMT_BASELINE)) == 0) { 1849 baseline_enabled = true; 1850 return 0; 1851 } 1852 /* unknown PMD power management mode */ 1853 return -1; 1854 } 1855 1856 static int 1857 parse_ep_config(const char *q_arg) 1858 { 1859 char s[256]; 1860 const char *p = q_arg; 1861 char *end; 1862 int num_arg; 1863 1864 char *str_fld[3]; 1865 1866 int training_flag; 1867 int med_edpi; 1868 int hgh_edpi; 1869 1870 ep_med_edpi = EMPTY_POLL_MED_THRESHOLD; 1871 ep_hgh_edpi = EMPTY_POLL_HGH_THRESHOLD; 1872 1873 strlcpy(s, p, sizeof(s)); 1874 1875 num_arg = rte_strsplit(s, sizeof(s), str_fld, 3, ','); 1876 1877 empty_poll_train = false; 1878 1879 if (num_arg == 0) 1880 return 0; 1881 1882 if (num_arg == 3) { 1883 1884 training_flag = strtoul(str_fld[0], &end, 0); 1885 med_edpi = strtoul(str_fld[1], &end, 0); 1886 hgh_edpi = strtoul(str_fld[2], &end, 0); 1887 1888 if (training_flag == 1) 1889 empty_poll_train = true; 1890 1891 if (med_edpi > 0) 1892 ep_med_edpi = med_edpi; 1893 1894 if (hgh_edpi > 0) 1895 ep_hgh_edpi = hgh_edpi; 1896 1897 } else { 1898 1899 return -1; 1900 } 1901 1902 return 0; 1903 1904 } 1905 #define CMD_LINE_OPT_PARSE_PTYPE "parse-ptype" 1906 #define CMD_LINE_OPT_LEGACY "legacy" 1907 #define CMD_LINE_OPT_EMPTY_POLL "empty-poll" 1908 #define CMD_LINE_OPT_INTERRUPT_ONLY "interrupt-only" 1909 #define CMD_LINE_OPT_TELEMETRY "telemetry" 1910 #define CMD_LINE_OPT_PMD_MGMT "pmd-mgmt" 1911 #define CMD_LINE_OPT_MAX_PKT_LEN "max-pkt-len" 1912 #define CMD_LINE_OPT_MAX_EMPTY_POLLS "max-empty-polls" 1913 #define CMD_LINE_OPT_PAUSE_DURATION "pause-duration" 1914 #define CMD_LINE_OPT_SCALE_FREQ_MIN "scale-freq-min" 1915 #define CMD_LINE_OPT_SCALE_FREQ_MAX "scale-freq-max" 1916 1917 /* Parse the argument given in the command line of the application */ 1918 static int 1919 parse_args(int argc, char **argv) 1920 { 1921 int opt, ret; 1922 char **argvopt; 1923 int option_index; 1924 uint32_t limit; 1925 char *prgname = argv[0]; 1926 static struct option lgopts[] = { 1927 {"config", 1, 0, 0}, 1928 {"perf-config", 1, 0, 0}, 1929 {"high-perf-cores", 1, 0, 0}, 1930 {"no-numa", 0, 0, 0}, 1931 {CMD_LINE_OPT_MAX_PKT_LEN, 1, 0, 0}, 1932 {CMD_LINE_OPT_EMPTY_POLL, 1, 0, 0}, 1933 {CMD_LINE_OPT_PARSE_PTYPE, 0, 0, 0}, 1934 {CMD_LINE_OPT_LEGACY, 0, 0, 0}, 1935 {CMD_LINE_OPT_TELEMETRY, 0, 0, 0}, 1936 {CMD_LINE_OPT_INTERRUPT_ONLY, 0, 0, 0}, 1937 {CMD_LINE_OPT_PMD_MGMT, 1, 0, 0}, 1938 {CMD_LINE_OPT_MAX_EMPTY_POLLS, 1, 0, 0}, 1939 {CMD_LINE_OPT_PAUSE_DURATION, 1, 0, 0}, 1940 {CMD_LINE_OPT_SCALE_FREQ_MIN, 1, 0, 0}, 1941 {CMD_LINE_OPT_SCALE_FREQ_MAX, 1, 0, 0}, 1942 {NULL, 0, 0, 0} 1943 }; 1944 1945 argvopt = argv; 1946 1947 while ((opt = getopt_long(argc, argvopt, "p:l:m:h:PuUi:", 1948 lgopts, &option_index)) != EOF) { 1949 1950 switch (opt) { 1951 /* portmask */ 1952 case 'p': 1953 enabled_port_mask = parse_portmask(optarg); 1954 if (enabled_port_mask == 0) { 1955 printf("invalid portmask\n"); 1956 print_usage(prgname); 1957 return -1; 1958 } 1959 break; 1960 case 'P': 1961 printf("Promiscuous mode selected\n"); 1962 promiscuous_on = 1; 1963 break; 1964 case 'l': 1965 limit = parse_max_pkt_len(optarg); 1966 freq_tlb[LOW] = limit; 1967 break; 1968 case 'm': 1969 limit = parse_max_pkt_len(optarg); 1970 freq_tlb[MED] = limit; 1971 break; 1972 case 'h': 1973 limit = parse_max_pkt_len(optarg); 1974 freq_tlb[HGH] = limit; 1975 break; 1976 case 'u': 1977 enabled_uncore = parse_uncore_options(UNCORE_MIN, NULL); 1978 if (enabled_uncore < 0) { 1979 print_usage(prgname); 1980 return -1; 1981 } 1982 break; 1983 case 'U': 1984 enabled_uncore = parse_uncore_options(UNCORE_MAX, NULL); 1985 if (enabled_uncore < 0) { 1986 print_usage(prgname); 1987 return -1; 1988 } 1989 break; 1990 case 'i': 1991 enabled_uncore = parse_uncore_options(UNCORE_IDX, optarg); 1992 if (enabled_uncore < 0) { 1993 print_usage(prgname); 1994 return -1; 1995 } 1996 break; 1997 /* long options */ 1998 case 0: 1999 if (!strncmp(lgopts[option_index].name, "config", 6)) { 2000 ret = parse_config(optarg); 2001 if (ret) { 2002 printf("invalid config\n"); 2003 print_usage(prgname); 2004 return -1; 2005 } 2006 } 2007 2008 if (!strncmp(lgopts[option_index].name, 2009 "perf-config", 11)) { 2010 ret = parse_perf_config(optarg); 2011 if (ret) { 2012 printf("invalid perf-config\n"); 2013 print_usage(prgname); 2014 return -1; 2015 } 2016 } 2017 2018 if (!strncmp(lgopts[option_index].name, 2019 "high-perf-cores", 15)) { 2020 ret = parse_perf_core_list(optarg); 2021 if (ret) { 2022 printf("invalid high-perf-cores\n"); 2023 print_usage(prgname); 2024 return -1; 2025 } 2026 } 2027 2028 if (!strncmp(lgopts[option_index].name, 2029 "no-numa", 7)) { 2030 printf("numa is disabled \n"); 2031 numa_on = 0; 2032 } 2033 2034 if (!strncmp(lgopts[option_index].name, 2035 CMD_LINE_OPT_LEGACY, 2036 sizeof(CMD_LINE_OPT_LEGACY))) { 2037 if (app_mode != APP_MODE_DEFAULT) { 2038 printf(" legacy mode is mutually exclusive with other modes\n"); 2039 return -1; 2040 } 2041 app_mode = APP_MODE_LEGACY; 2042 printf("legacy mode is enabled\n"); 2043 } 2044 2045 if (!strncmp(lgopts[option_index].name, 2046 CMD_LINE_OPT_EMPTY_POLL, 10)) { 2047 if (app_mode != APP_MODE_DEFAULT) { 2048 printf(" empty-poll mode is mutually exclusive with other modes\n"); 2049 return -1; 2050 } 2051 app_mode = APP_MODE_EMPTY_POLL; 2052 ret = parse_ep_config(optarg); 2053 2054 if (ret) { 2055 printf("invalid empty poll config\n"); 2056 print_usage(prgname); 2057 return -1; 2058 } 2059 printf("empty-poll is enabled\n"); 2060 } 2061 2062 if (!strncmp(lgopts[option_index].name, 2063 CMD_LINE_OPT_TELEMETRY, 2064 sizeof(CMD_LINE_OPT_TELEMETRY))) { 2065 if (app_mode != APP_MODE_DEFAULT) { 2066 printf(" telemetry mode is mutually exclusive with other modes\n"); 2067 return -1; 2068 } 2069 app_mode = APP_MODE_TELEMETRY; 2070 printf("telemetry mode is enabled\n"); 2071 } 2072 2073 if (!strncmp(lgopts[option_index].name, 2074 CMD_LINE_OPT_PMD_MGMT, 2075 sizeof(CMD_LINE_OPT_PMD_MGMT))) { 2076 if (app_mode != APP_MODE_DEFAULT) { 2077 printf(" power mgmt mode is mutually exclusive with other modes\n"); 2078 return -1; 2079 } 2080 if (parse_pmd_mgmt_config(optarg) < 0) { 2081 printf(" Invalid PMD power management mode: %s\n", 2082 optarg); 2083 return -1; 2084 } 2085 app_mode = APP_MODE_PMD_MGMT; 2086 printf("PMD power mgmt mode is enabled\n"); 2087 } 2088 if (!strncmp(lgopts[option_index].name, 2089 CMD_LINE_OPT_INTERRUPT_ONLY, 2090 sizeof(CMD_LINE_OPT_INTERRUPT_ONLY))) { 2091 if (app_mode != APP_MODE_DEFAULT) { 2092 printf(" interrupt-only mode is mutually exclusive with other modes\n"); 2093 return -1; 2094 } 2095 app_mode = APP_MODE_INTERRUPT; 2096 printf("interrupt-only mode is enabled\n"); 2097 } 2098 2099 if (!strncmp(lgopts[option_index].name, 2100 CMD_LINE_OPT_MAX_PKT_LEN, 2101 sizeof(CMD_LINE_OPT_MAX_PKT_LEN))) { 2102 printf("Custom frame size is configured\n"); 2103 max_pkt_len = parse_max_pkt_len(optarg); 2104 } 2105 2106 if (!strncmp(lgopts[option_index].name, 2107 CMD_LINE_OPT_PARSE_PTYPE, 2108 sizeof(CMD_LINE_OPT_PARSE_PTYPE))) { 2109 printf("soft parse-ptype is enabled\n"); 2110 parse_ptype = 1; 2111 } 2112 2113 if (!strncmp(lgopts[option_index].name, 2114 CMD_LINE_OPT_MAX_EMPTY_POLLS, 2115 sizeof(CMD_LINE_OPT_MAX_EMPTY_POLLS))) { 2116 printf("Maximum empty polls configured\n"); 2117 max_empty_polls = parse_int(optarg); 2118 } 2119 2120 if (!strncmp(lgopts[option_index].name, 2121 CMD_LINE_OPT_PAUSE_DURATION, 2122 sizeof(CMD_LINE_OPT_PAUSE_DURATION))) { 2123 printf("Pause duration configured\n"); 2124 pause_duration = parse_int(optarg); 2125 } 2126 2127 if (!strncmp(lgopts[option_index].name, 2128 CMD_LINE_OPT_SCALE_FREQ_MIN, 2129 sizeof(CMD_LINE_OPT_SCALE_FREQ_MIN))) { 2130 printf("Scaling frequency minimum configured\n"); 2131 scale_freq_min = parse_int(optarg); 2132 } 2133 2134 if (!strncmp(lgopts[option_index].name, 2135 CMD_LINE_OPT_SCALE_FREQ_MAX, 2136 sizeof(CMD_LINE_OPT_SCALE_FREQ_MAX))) { 2137 printf("Scaling frequency maximum configured\n"); 2138 scale_freq_max = parse_int(optarg); 2139 } 2140 2141 break; 2142 2143 default: 2144 print_usage(prgname); 2145 return -1; 2146 } 2147 } 2148 2149 if (optind >= 0) 2150 argv[optind-1] = prgname; 2151 2152 ret = optind-1; 2153 optind = 1; /* reset getopt lib */ 2154 return ret; 2155 } 2156 2157 static void 2158 print_ethaddr(const char *name, const struct rte_ether_addr *eth_addr) 2159 { 2160 char buf[RTE_ETHER_ADDR_FMT_SIZE]; 2161 rte_ether_format_addr(buf, RTE_ETHER_ADDR_FMT_SIZE, eth_addr); 2162 printf("%s%s", name, buf); 2163 } 2164 2165 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 2166 static void 2167 setup_hash(int socketid) 2168 { 2169 struct rte_hash_parameters ipv4_l3fwd_hash_params = { 2170 .name = NULL, 2171 .entries = L3FWD_HASH_ENTRIES, 2172 .key_len = sizeof(struct ipv4_5tuple), 2173 .hash_func = DEFAULT_HASH_FUNC, 2174 .hash_func_init_val = 0, 2175 }; 2176 2177 struct rte_hash_parameters ipv6_l3fwd_hash_params = { 2178 .name = NULL, 2179 .entries = L3FWD_HASH_ENTRIES, 2180 .key_len = sizeof(struct ipv6_5tuple), 2181 .hash_func = DEFAULT_HASH_FUNC, 2182 .hash_func_init_val = 0, 2183 }; 2184 2185 unsigned i; 2186 int ret; 2187 char s[64]; 2188 2189 /* create ipv4 hash */ 2190 snprintf(s, sizeof(s), "ipv4_l3fwd_hash_%d", socketid); 2191 ipv4_l3fwd_hash_params.name = s; 2192 ipv4_l3fwd_hash_params.socket_id = socketid; 2193 ipv4_l3fwd_lookup_struct[socketid] = 2194 rte_hash_create(&ipv4_l3fwd_hash_params); 2195 if (ipv4_l3fwd_lookup_struct[socketid] == NULL) 2196 rte_exit(EXIT_FAILURE, "Unable to create the l3fwd hash on " 2197 "socket %d\n", socketid); 2198 2199 /* create ipv6 hash */ 2200 snprintf(s, sizeof(s), "ipv6_l3fwd_hash_%d", socketid); 2201 ipv6_l3fwd_hash_params.name = s; 2202 ipv6_l3fwd_hash_params.socket_id = socketid; 2203 ipv6_l3fwd_lookup_struct[socketid] = 2204 rte_hash_create(&ipv6_l3fwd_hash_params); 2205 if (ipv6_l3fwd_lookup_struct[socketid] == NULL) 2206 rte_exit(EXIT_FAILURE, "Unable to create the l3fwd hash on " 2207 "socket %d\n", socketid); 2208 2209 2210 /* populate the ipv4 hash */ 2211 for (i = 0; i < RTE_DIM(ipv4_l3fwd_route_array); i++) { 2212 ret = rte_hash_add_key (ipv4_l3fwd_lookup_struct[socketid], 2213 (void *) &ipv4_l3fwd_route_array[i].key); 2214 if (ret < 0) { 2215 rte_exit(EXIT_FAILURE, "Unable to add entry %u to the" 2216 "l3fwd hash on socket %d\n", i, socketid); 2217 } 2218 ipv4_l3fwd_out_if[ret] = ipv4_l3fwd_route_array[i].if_out; 2219 printf("Hash: Adding key\n"); 2220 print_ipv4_key(ipv4_l3fwd_route_array[i].key); 2221 } 2222 2223 /* populate the ipv6 hash */ 2224 for (i = 0; i < RTE_DIM(ipv6_l3fwd_route_array); i++) { 2225 ret = rte_hash_add_key (ipv6_l3fwd_lookup_struct[socketid], 2226 (void *) &ipv6_l3fwd_route_array[i].key); 2227 if (ret < 0) { 2228 rte_exit(EXIT_FAILURE, "Unable to add entry %u to the" 2229 "l3fwd hash on socket %d\n", i, socketid); 2230 } 2231 ipv6_l3fwd_out_if[ret] = ipv6_l3fwd_route_array[i].if_out; 2232 printf("Hash: Adding key\n"); 2233 print_ipv6_key(ipv6_l3fwd_route_array[i].key); 2234 } 2235 } 2236 #endif 2237 2238 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 2239 static void 2240 setup_lpm(int socketid) 2241 { 2242 unsigned i; 2243 int ret; 2244 char s[64]; 2245 2246 /* create the LPM table */ 2247 struct rte_lpm_config lpm_ipv4_config; 2248 2249 lpm_ipv4_config.max_rules = IPV4_L3FWD_LPM_MAX_RULES; 2250 lpm_ipv4_config.number_tbl8s = 256; 2251 lpm_ipv4_config.flags = 0; 2252 2253 snprintf(s, sizeof(s), "IPV4_L3FWD_LPM_%d", socketid); 2254 ipv4_l3fwd_lookup_struct[socketid] = 2255 rte_lpm_create(s, socketid, &lpm_ipv4_config); 2256 if (ipv4_l3fwd_lookup_struct[socketid] == NULL) 2257 rte_exit(EXIT_FAILURE, "Unable to create the l3fwd LPM table" 2258 " on socket %d\n", socketid); 2259 2260 /* populate the LPM table */ 2261 for (i = 0; i < RTE_DIM(ipv4_l3fwd_route_array); i++) { 2262 ret = rte_lpm_add(ipv4_l3fwd_lookup_struct[socketid], 2263 ipv4_l3fwd_route_array[i].ip, 2264 ipv4_l3fwd_route_array[i].depth, 2265 ipv4_l3fwd_route_array[i].if_out); 2266 2267 if (ret < 0) { 2268 rte_exit(EXIT_FAILURE, "Unable to add entry %u to the " 2269 "l3fwd LPM table on socket %d\n", 2270 i, socketid); 2271 } 2272 2273 printf("LPM: Adding route 0x%08x / %d (%d)\n", 2274 (unsigned)ipv4_l3fwd_route_array[i].ip, 2275 ipv4_l3fwd_route_array[i].depth, 2276 ipv4_l3fwd_route_array[i].if_out); 2277 } 2278 } 2279 #endif 2280 2281 static int 2282 init_mem(unsigned nb_mbuf) 2283 { 2284 struct lcore_conf *qconf; 2285 int socketid; 2286 unsigned lcore_id; 2287 char s[64]; 2288 2289 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { 2290 if (rte_lcore_is_enabled(lcore_id) == 0) 2291 continue; 2292 2293 if (numa_on) 2294 socketid = rte_lcore_to_socket_id(lcore_id); 2295 else 2296 socketid = 0; 2297 2298 if (socketid >= NB_SOCKETS) { 2299 rte_exit(EXIT_FAILURE, "Socket %d of lcore %u is " 2300 "out of range %d\n", socketid, 2301 lcore_id, NB_SOCKETS); 2302 } 2303 if (pktmbuf_pool[socketid] == NULL) { 2304 snprintf(s, sizeof(s), "mbuf_pool_%d", socketid); 2305 pktmbuf_pool[socketid] = 2306 rte_pktmbuf_pool_create(s, nb_mbuf, 2307 MEMPOOL_CACHE_SIZE, 0, 2308 RTE_MBUF_DEFAULT_BUF_SIZE, 2309 socketid); 2310 if (pktmbuf_pool[socketid] == NULL) 2311 rte_exit(EXIT_FAILURE, 2312 "Cannot init mbuf pool on socket %d\n", 2313 socketid); 2314 else 2315 printf("Allocated mbuf pool on socket %d\n", 2316 socketid); 2317 2318 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 2319 setup_lpm(socketid); 2320 #else 2321 setup_hash(socketid); 2322 #endif 2323 } 2324 qconf = &lcore_conf[lcore_id]; 2325 qconf->ipv4_lookup_struct = ipv4_l3fwd_lookup_struct[socketid]; 2326 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 2327 qconf->ipv6_lookup_struct = ipv6_l3fwd_lookup_struct[socketid]; 2328 #endif 2329 } 2330 return 0; 2331 } 2332 2333 /* Check the link status of all ports in up to 9s, and print them finally */ 2334 static void 2335 check_all_ports_link_status(uint32_t port_mask) 2336 { 2337 #define CHECK_INTERVAL 100 /* 100ms */ 2338 #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */ 2339 uint8_t count, all_ports_up, print_flag = 0; 2340 uint16_t portid; 2341 struct rte_eth_link link; 2342 int ret; 2343 char link_status_text[RTE_ETH_LINK_MAX_STR_LEN]; 2344 2345 printf("\nChecking link status"); 2346 fflush(stdout); 2347 for (count = 0; count <= MAX_CHECK_TIME; count++) { 2348 all_ports_up = 1; 2349 RTE_ETH_FOREACH_DEV(portid) { 2350 if ((port_mask & (1 << portid)) == 0) 2351 continue; 2352 memset(&link, 0, sizeof(link)); 2353 ret = rte_eth_link_get_nowait(portid, &link); 2354 if (ret < 0) { 2355 all_ports_up = 0; 2356 if (print_flag == 1) 2357 printf("Port %u link get failed: %s\n", 2358 portid, rte_strerror(-ret)); 2359 continue; 2360 } 2361 /* print link status if flag set */ 2362 if (print_flag == 1) { 2363 rte_eth_link_to_str(link_status_text, 2364 sizeof(link_status_text), &link); 2365 printf("Port %d %s\n", portid, 2366 link_status_text); 2367 continue; 2368 } 2369 /* clear all_ports_up flag if any link down */ 2370 if (link.link_status == RTE_ETH_LINK_DOWN) { 2371 all_ports_up = 0; 2372 break; 2373 } 2374 } 2375 /* after finally printing all link status, get out */ 2376 if (print_flag == 1) 2377 break; 2378 2379 if (all_ports_up == 0) { 2380 printf("."); 2381 fflush(stdout); 2382 rte_delay_ms(CHECK_INTERVAL); 2383 } 2384 2385 /* set the print_flag if all ports up or timeout */ 2386 if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) { 2387 print_flag = 1; 2388 printf("done\n"); 2389 } 2390 } 2391 } 2392 2393 static int check_ptype(uint16_t portid) 2394 { 2395 int i, ret; 2396 int ptype_l3_ipv4 = 0; 2397 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 2398 int ptype_l3_ipv6 = 0; 2399 #endif 2400 uint32_t ptype_mask = RTE_PTYPE_L3_MASK; 2401 2402 ret = rte_eth_dev_get_supported_ptypes(portid, ptype_mask, NULL, 0); 2403 if (ret <= 0) 2404 return 0; 2405 2406 uint32_t ptypes[ret]; 2407 2408 ret = rte_eth_dev_get_supported_ptypes(portid, ptype_mask, ptypes, ret); 2409 for (i = 0; i < ret; ++i) { 2410 if (ptypes[i] & RTE_PTYPE_L3_IPV4) 2411 ptype_l3_ipv4 = 1; 2412 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 2413 if (ptypes[i] & RTE_PTYPE_L3_IPV6) 2414 ptype_l3_ipv6 = 1; 2415 #endif 2416 } 2417 2418 if (ptype_l3_ipv4 == 0) 2419 printf("port %d cannot parse RTE_PTYPE_L3_IPV4\n", portid); 2420 2421 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 2422 if (ptype_l3_ipv6 == 0) 2423 printf("port %d cannot parse RTE_PTYPE_L3_IPV6\n", portid); 2424 #endif 2425 2426 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 2427 if (ptype_l3_ipv4) 2428 #else /* APP_LOOKUP_EXACT_MATCH */ 2429 if (ptype_l3_ipv4 && ptype_l3_ipv6) 2430 #endif 2431 return 1; 2432 2433 return 0; 2434 2435 } 2436 2437 static int 2438 init_power_library(void) 2439 { 2440 enum power_management_env env; 2441 unsigned int lcore_id; 2442 int ret = 0; 2443 2444 RTE_LCORE_FOREACH(lcore_id) { 2445 /* init power management library */ 2446 ret = rte_power_init(lcore_id); 2447 if (ret) { 2448 RTE_LOG(ERR, POWER, 2449 "Library initialization failed on core %u\n", 2450 lcore_id); 2451 return ret; 2452 } 2453 /* we're not supporting the VM channel mode */ 2454 env = rte_power_get_env(); 2455 if (env != PM_ENV_ACPI_CPUFREQ && 2456 env != PM_ENV_PSTATE_CPUFREQ) { 2457 RTE_LOG(ERR, POWER, 2458 "Only ACPI and PSTATE mode are supported\n"); 2459 return -1; 2460 } 2461 } 2462 return ret; 2463 } 2464 2465 static int 2466 deinit_power_library(void) 2467 { 2468 unsigned int lcore_id, max_pkg, max_die, die, pkg; 2469 int ret = 0; 2470 2471 RTE_LCORE_FOREACH(lcore_id) { 2472 /* deinit power management library */ 2473 ret = rte_power_exit(lcore_id); 2474 if (ret) { 2475 RTE_LOG(ERR, POWER, 2476 "Library deinitialization failed on core %u\n", 2477 lcore_id); 2478 return ret; 2479 } 2480 } 2481 2482 /* if uncore option was set */ 2483 if (enabled_uncore == 0) { 2484 max_pkg = rte_power_uncore_get_num_pkgs(); 2485 if (max_pkg == 0) 2486 return -1; 2487 for (pkg = 0; pkg < max_pkg; pkg++) { 2488 max_die = rte_power_uncore_get_num_dies(pkg); 2489 if (max_die == 0) 2490 return -1; 2491 for (die = 0; die < max_die; die++) { 2492 ret = rte_power_uncore_exit(pkg, die); 2493 if (ret < 0) { 2494 RTE_LOG(ERR, L3FWD_POWER, "Failed to exit uncore deinit successfully for pkg %02u die %02u\n" 2495 , pkg, die); 2496 return -1; 2497 } 2498 } 2499 } 2500 } 2501 return ret; 2502 } 2503 2504 static void 2505 get_current_stat_values(uint64_t *values) 2506 { 2507 unsigned int lcore_id = rte_lcore_id(); 2508 struct lcore_conf *qconf; 2509 uint64_t app_eps = 0, app_fps = 0, app_br = 0; 2510 uint64_t count = 0; 2511 2512 RTE_LCORE_FOREACH_WORKER(lcore_id) { 2513 qconf = &lcore_conf[lcore_id]; 2514 if (qconf->n_rx_queue == 0) 2515 continue; 2516 count++; 2517 rte_spinlock_lock(&stats[lcore_id].telemetry_lock); 2518 app_eps += stats[lcore_id].ep_nep[1]; 2519 app_fps += stats[lcore_id].fp_nfp[1]; 2520 app_br += stats[lcore_id].br; 2521 rte_spinlock_unlock(&stats[lcore_id].telemetry_lock); 2522 } 2523 2524 if (count > 0) { 2525 values[0] = app_eps/count; 2526 values[1] = app_fps/count; 2527 values[2] = app_br/count; 2528 } else 2529 memset(values, 0, sizeof(uint64_t) * NUM_TELSTATS); 2530 2531 } 2532 2533 static void 2534 update_telemetry(__rte_unused struct rte_timer *tim, 2535 __rte_unused void *arg) 2536 { 2537 int ret; 2538 uint64_t values[NUM_TELSTATS] = {0}; 2539 2540 get_current_stat_values(values); 2541 ret = rte_metrics_update_values(RTE_METRICS_GLOBAL, telstats_index, 2542 values, RTE_DIM(values)); 2543 if (ret < 0) 2544 RTE_LOG(WARNING, POWER, "failed to update metrics\n"); 2545 } 2546 2547 static int 2548 handle_app_stats(const char *cmd __rte_unused, 2549 const char *params __rte_unused, 2550 struct rte_tel_data *d) 2551 { 2552 uint64_t values[NUM_TELSTATS] = {0}; 2553 uint32_t i; 2554 2555 rte_tel_data_start_dict(d); 2556 get_current_stat_values(values); 2557 for (i = 0; i < NUM_TELSTATS; i++) 2558 rte_tel_data_add_dict_u64(d, telstats_strings[i].name, 2559 values[i]); 2560 return 0; 2561 } 2562 2563 static void 2564 telemetry_setup_timer(void) 2565 { 2566 int lcore_id = rte_lcore_id(); 2567 uint64_t hz = rte_get_timer_hz(); 2568 uint64_t ticks; 2569 2570 ticks = hz / TELEMETRY_INTERVALS_PER_SEC; 2571 rte_timer_reset_sync(&telemetry_timer, 2572 ticks, 2573 PERIODICAL, 2574 lcore_id, 2575 update_telemetry, 2576 NULL); 2577 } 2578 static void 2579 empty_poll_setup_timer(void) 2580 { 2581 int lcore_id = rte_lcore_id(); 2582 uint64_t hz = rte_get_timer_hz(); 2583 2584 struct ep_params *ep_ptr = ep_params; 2585 2586 ep_ptr->interval_ticks = hz / INTERVALS_PER_SECOND; 2587 2588 rte_timer_reset_sync(&ep_ptr->timer0, 2589 ep_ptr->interval_ticks, 2590 PERIODICAL, 2591 lcore_id, 2592 rte_empty_poll_detection, 2593 (void *)ep_ptr); 2594 2595 } 2596 static int 2597 launch_timer(unsigned int lcore_id) 2598 { 2599 int64_t prev_tsc = 0, cur_tsc, diff_tsc, cycles_10ms; 2600 2601 RTE_SET_USED(lcore_id); 2602 2603 2604 if (rte_get_main_lcore() != lcore_id) { 2605 rte_panic("timer on lcore:%d which is not main core:%d\n", 2606 lcore_id, 2607 rte_get_main_lcore()); 2608 } 2609 2610 RTE_LOG(INFO, POWER, "Bring up the Timer\n"); 2611 2612 if (app_mode == APP_MODE_EMPTY_POLL) 2613 empty_poll_setup_timer(); 2614 else 2615 telemetry_setup_timer(); 2616 2617 cycles_10ms = rte_get_timer_hz() / 100; 2618 2619 while (!is_done()) { 2620 cur_tsc = rte_rdtsc(); 2621 diff_tsc = cur_tsc - prev_tsc; 2622 if (diff_tsc > cycles_10ms) { 2623 rte_timer_manage(); 2624 prev_tsc = cur_tsc; 2625 cycles_10ms = rte_get_timer_hz() / 100; 2626 } 2627 } 2628 2629 RTE_LOG(INFO, POWER, "Timer_subsystem is done\n"); 2630 2631 return 0; 2632 } 2633 2634 static int 2635 autodetect_mode(void) 2636 { 2637 RTE_LOG(NOTICE, L3FWD_POWER, "Operating mode not specified, probing frequency scaling support...\n"); 2638 2639 /* 2640 * Empty poll and telemetry modes have to be specifically requested to 2641 * be enabled, but we can auto-detect between interrupt mode with or 2642 * without frequency scaling. Both ACPI and pstate can be used. 2643 */ 2644 if (rte_power_check_env_supported(PM_ENV_ACPI_CPUFREQ)) 2645 return APP_MODE_LEGACY; 2646 if (rte_power_check_env_supported(PM_ENV_PSTATE_CPUFREQ)) 2647 return APP_MODE_LEGACY; 2648 2649 RTE_LOG(NOTICE, L3FWD_POWER, "Frequency scaling not supported, selecting interrupt-only mode\n"); 2650 2651 return APP_MODE_INTERRUPT; 2652 } 2653 2654 static const char * 2655 mode_to_str(enum appmode mode) 2656 { 2657 switch (mode) { 2658 case APP_MODE_LEGACY: 2659 return "legacy"; 2660 case APP_MODE_EMPTY_POLL: 2661 return "empty poll"; 2662 case APP_MODE_TELEMETRY: 2663 return "telemetry"; 2664 case APP_MODE_INTERRUPT: 2665 return "interrupt-only"; 2666 case APP_MODE_PMD_MGMT: 2667 return "pmd mgmt"; 2668 default: 2669 return "invalid"; 2670 } 2671 } 2672 2673 static uint32_t 2674 eth_dev_get_overhead_len(uint32_t max_rx_pktlen, uint16_t max_mtu) 2675 { 2676 uint32_t overhead_len; 2677 2678 if (max_mtu != UINT16_MAX && max_rx_pktlen > max_mtu) 2679 overhead_len = max_rx_pktlen - max_mtu; 2680 else 2681 overhead_len = RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN; 2682 2683 return overhead_len; 2684 } 2685 2686 static int 2687 config_port_max_pkt_len(struct rte_eth_conf *conf, 2688 struct rte_eth_dev_info *dev_info) 2689 { 2690 uint32_t overhead_len; 2691 2692 if (max_pkt_len == 0) 2693 return 0; 2694 2695 if (max_pkt_len < RTE_ETHER_MIN_LEN || max_pkt_len > MAX_JUMBO_PKT_LEN) 2696 return -1; 2697 2698 overhead_len = eth_dev_get_overhead_len(dev_info->max_rx_pktlen, 2699 dev_info->max_mtu); 2700 conf->rxmode.mtu = max_pkt_len - overhead_len; 2701 2702 if (conf->rxmode.mtu > RTE_ETHER_MTU) 2703 conf->txmode.offloads |= RTE_ETH_TX_OFFLOAD_MULTI_SEGS; 2704 2705 return 0; 2706 } 2707 2708 /* Power library initialized in the main routine. 8< */ 2709 int 2710 main(int argc, char **argv) 2711 { 2712 struct lcore_conf *qconf; 2713 struct rte_eth_dev_info dev_info; 2714 struct rte_eth_txconf *txconf; 2715 int ret; 2716 uint16_t nb_ports; 2717 uint16_t queueid; 2718 unsigned lcore_id; 2719 uint64_t hz; 2720 uint32_t n_tx_queue, nb_lcores; 2721 uint32_t dev_rxq_num, dev_txq_num; 2722 uint8_t nb_rx_queue, queue, socketid; 2723 uint16_t portid; 2724 const char *ptr_strings[NUM_TELSTATS]; 2725 2726 /* init EAL */ 2727 ret = rte_eal_init(argc, argv); 2728 if (ret < 0) 2729 rte_exit(EXIT_FAILURE, "Invalid EAL parameters\n"); 2730 argc -= ret; 2731 argv += ret; 2732 2733 /* catch SIGINT and restore cpufreq governor to ondemand */ 2734 signal(SIGINT, signal_exit_now); 2735 2736 /* init RTE timer library to be used late */ 2737 rte_timer_subsystem_init(); 2738 2739 /* if we're running pmd-mgmt mode, don't default to baseline mode */ 2740 baseline_enabled = false; 2741 2742 /* parse application arguments (after the EAL ones) */ 2743 ret = parse_args(argc, argv); 2744 if (ret < 0) 2745 rte_exit(EXIT_FAILURE, "Invalid L3FWD parameters\n"); 2746 2747 if (app_mode == APP_MODE_DEFAULT) 2748 app_mode = autodetect_mode(); 2749 2750 RTE_LOG(INFO, L3FWD_POWER, "Selected operation mode: %s\n", 2751 mode_to_str(app_mode)); 2752 2753 /* only legacy and empty poll mode rely on power library */ 2754 if ((app_mode == APP_MODE_LEGACY || app_mode == APP_MODE_EMPTY_POLL) && 2755 init_power_library()) 2756 rte_exit(EXIT_FAILURE, "init_power_library failed\n"); 2757 2758 if (update_lcore_params() < 0) 2759 rte_exit(EXIT_FAILURE, "update_lcore_params failed\n"); 2760 2761 if (check_lcore_params() < 0) 2762 rte_exit(EXIT_FAILURE, "check_lcore_params failed\n"); 2763 2764 ret = init_lcore_rx_queues(); 2765 if (ret < 0) 2766 rte_exit(EXIT_FAILURE, "init_lcore_rx_queues failed\n"); 2767 2768 nb_ports = rte_eth_dev_count_avail(); 2769 2770 if (check_port_config() < 0) 2771 rte_exit(EXIT_FAILURE, "check_port_config failed\n"); 2772 2773 nb_lcores = rte_lcore_count(); 2774 2775 /* initialize all ports */ 2776 RTE_ETH_FOREACH_DEV(portid) { 2777 struct rte_eth_conf local_port_conf = port_conf; 2778 /* not all app modes need interrupts */ 2779 bool need_intr = app_mode == APP_MODE_LEGACY || 2780 app_mode == APP_MODE_INTERRUPT; 2781 2782 /* skip ports that are not enabled */ 2783 if ((enabled_port_mask & (1 << portid)) == 0) { 2784 printf("\nSkipping disabled port %d\n", portid); 2785 continue; 2786 } 2787 2788 /* init port */ 2789 printf("Initializing port %d ... ", portid ); 2790 fflush(stdout); 2791 2792 ret = rte_eth_dev_info_get(portid, &dev_info); 2793 if (ret != 0) 2794 rte_exit(EXIT_FAILURE, 2795 "Error during getting device (port %u) info: %s\n", 2796 portid, strerror(-ret)); 2797 2798 dev_rxq_num = dev_info.max_rx_queues; 2799 dev_txq_num = dev_info.max_tx_queues; 2800 2801 nb_rx_queue = get_port_n_rx_queues(portid); 2802 if (nb_rx_queue > dev_rxq_num) 2803 rte_exit(EXIT_FAILURE, 2804 "Cannot configure not existed rxq: " 2805 "port=%d\n", portid); 2806 2807 n_tx_queue = nb_lcores; 2808 if (n_tx_queue > dev_txq_num) 2809 n_tx_queue = dev_txq_num; 2810 printf("Creating queues: nb_rxq=%d nb_txq=%u... ", 2811 nb_rx_queue, (unsigned)n_tx_queue ); 2812 /* If number of Rx queue is 0, no need to enable Rx interrupt */ 2813 if (nb_rx_queue == 0) 2814 need_intr = false; 2815 2816 if (need_intr) 2817 local_port_conf.intr_conf.rxq = 1; 2818 2819 ret = rte_eth_dev_info_get(portid, &dev_info); 2820 if (ret != 0) 2821 rte_exit(EXIT_FAILURE, 2822 "Error during getting device (port %u) info: %s\n", 2823 portid, strerror(-ret)); 2824 2825 ret = config_port_max_pkt_len(&local_port_conf, &dev_info); 2826 if (ret != 0) 2827 rte_exit(EXIT_FAILURE, 2828 "Invalid max packet length: %u (port %u)\n", 2829 max_pkt_len, portid); 2830 2831 if (dev_info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) 2832 local_port_conf.txmode.offloads |= 2833 RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE; 2834 2835 local_port_conf.rx_adv_conf.rss_conf.rss_hf &= 2836 dev_info.flow_type_rss_offloads; 2837 if (local_port_conf.rx_adv_conf.rss_conf.rss_hf != 2838 port_conf.rx_adv_conf.rss_conf.rss_hf) { 2839 printf("Port %u modified RSS hash function based on hardware support," 2840 "requested:%#"PRIx64" configured:%#"PRIx64"\n", 2841 portid, 2842 port_conf.rx_adv_conf.rss_conf.rss_hf, 2843 local_port_conf.rx_adv_conf.rss_conf.rss_hf); 2844 } 2845 2846 if (local_port_conf.rx_adv_conf.rss_conf.rss_hf == 0) 2847 local_port_conf.rxmode.mq_mode = RTE_ETH_MQ_RX_NONE; 2848 local_port_conf.rxmode.offloads &= dev_info.rx_offload_capa; 2849 port_conf.rxmode.offloads = local_port_conf.rxmode.offloads; 2850 2851 ret = rte_eth_dev_configure(portid, nb_rx_queue, 2852 (uint16_t)n_tx_queue, &local_port_conf); 2853 if (ret < 0) 2854 rte_exit(EXIT_FAILURE, "Cannot configure device: " 2855 "err=%d, port=%d\n", ret, portid); 2856 2857 ret = rte_eth_dev_adjust_nb_rx_tx_desc(portid, &nb_rxd, 2858 &nb_txd); 2859 if (ret < 0) 2860 rte_exit(EXIT_FAILURE, 2861 "Cannot adjust number of descriptors: err=%d, port=%d\n", 2862 ret, portid); 2863 2864 ret = rte_eth_macaddr_get(portid, &ports_eth_addr[portid]); 2865 if (ret < 0) 2866 rte_exit(EXIT_FAILURE, 2867 "Cannot get MAC address: err=%d, port=%d\n", 2868 ret, portid); 2869 2870 print_ethaddr(" Address:", &ports_eth_addr[portid]); 2871 printf(", "); 2872 2873 /* init memory */ 2874 ret = init_mem(NB_MBUF); 2875 if (ret < 0) 2876 rte_exit(EXIT_FAILURE, "init_mem failed\n"); 2877 2878 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { 2879 if (rte_lcore_is_enabled(lcore_id) == 0) 2880 continue; 2881 2882 /* Initialize TX buffers */ 2883 qconf = &lcore_conf[lcore_id]; 2884 qconf->tx_buffer[portid] = rte_zmalloc_socket("tx_buffer", 2885 RTE_ETH_TX_BUFFER_SIZE(MAX_PKT_BURST), 0, 2886 rte_eth_dev_socket_id(portid)); 2887 if (qconf->tx_buffer[portid] == NULL) 2888 rte_exit(EXIT_FAILURE, "Can't allocate tx buffer for port %u\n", 2889 portid); 2890 2891 rte_eth_tx_buffer_init(qconf->tx_buffer[portid], MAX_PKT_BURST); 2892 } 2893 2894 /* init one TX queue per couple (lcore,port) */ 2895 queueid = 0; 2896 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { 2897 if (rte_lcore_is_enabled(lcore_id) == 0) 2898 continue; 2899 2900 if (queueid >= dev_txq_num) 2901 continue; 2902 2903 if (numa_on) 2904 socketid = \ 2905 (uint8_t)rte_lcore_to_socket_id(lcore_id); 2906 else 2907 socketid = 0; 2908 2909 printf("txq=%u,%d,%d ", lcore_id, queueid, socketid); 2910 fflush(stdout); 2911 2912 txconf = &dev_info.default_txconf; 2913 txconf->offloads = local_port_conf.txmode.offloads; 2914 ret = rte_eth_tx_queue_setup(portid, queueid, nb_txd, 2915 socketid, txconf); 2916 if (ret < 0) 2917 rte_exit(EXIT_FAILURE, 2918 "rte_eth_tx_queue_setup: err=%d, " 2919 "port=%d\n", ret, portid); 2920 2921 qconf = &lcore_conf[lcore_id]; 2922 qconf->tx_queue_id[portid] = queueid; 2923 queueid++; 2924 2925 qconf->tx_port_id[qconf->n_tx_port] = portid; 2926 qconf->n_tx_port++; 2927 } 2928 printf("\n"); 2929 } 2930 2931 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { 2932 if (rte_lcore_is_enabled(lcore_id) == 0) 2933 continue; 2934 2935 if (app_mode == APP_MODE_LEGACY) { 2936 /* init timer structures for each enabled lcore */ 2937 rte_timer_init(&power_timers[lcore_id]); 2938 hz = rte_get_timer_hz(); 2939 rte_timer_reset(&power_timers[lcore_id], 2940 hz/TIMER_NUMBER_PER_SECOND, 2941 SINGLE, lcore_id, 2942 power_timer_cb, NULL); 2943 } 2944 qconf = &lcore_conf[lcore_id]; 2945 printf("\nInitializing rx queues on lcore %u ... ", lcore_id ); 2946 fflush(stdout); 2947 2948 /* init RX queues */ 2949 for(queue = 0; queue < qconf->n_rx_queue; ++queue) { 2950 struct rte_eth_rxconf rxq_conf; 2951 2952 portid = qconf->rx_queue_list[queue].port_id; 2953 queueid = qconf->rx_queue_list[queue].queue_id; 2954 2955 if (numa_on) 2956 socketid = \ 2957 (uint8_t)rte_lcore_to_socket_id(lcore_id); 2958 else 2959 socketid = 0; 2960 2961 printf("rxq=%d,%d,%d ", portid, queueid, socketid); 2962 fflush(stdout); 2963 2964 ret = rte_eth_dev_info_get(portid, &dev_info); 2965 if (ret != 0) 2966 rte_exit(EXIT_FAILURE, 2967 "Error during getting device (port %u) info: %s\n", 2968 portid, strerror(-ret)); 2969 2970 rxq_conf = dev_info.default_rxconf; 2971 rxq_conf.offloads = port_conf.rxmode.offloads; 2972 ret = rte_eth_rx_queue_setup(portid, queueid, nb_rxd, 2973 socketid, &rxq_conf, 2974 pktmbuf_pool[socketid]); 2975 if (ret < 0) 2976 rte_exit(EXIT_FAILURE, 2977 "rte_eth_rx_queue_setup: err=%d, " 2978 "port=%d\n", ret, portid); 2979 2980 if (parse_ptype) { 2981 if (add_cb_parse_ptype(portid, queueid) < 0) 2982 rte_exit(EXIT_FAILURE, 2983 "Fail to add ptype cb\n"); 2984 } 2985 2986 if (app_mode == APP_MODE_PMD_MGMT && !baseline_enabled) { 2987 /* Set power_pmd_mgmt configs passed by user */ 2988 rte_power_pmd_mgmt_set_emptypoll_max(max_empty_polls); 2989 ret = rte_power_pmd_mgmt_set_pause_duration(pause_duration); 2990 if (ret < 0) 2991 rte_exit(EXIT_FAILURE, 2992 "Error setting pause_duration: err=%d, lcore=%d\n", 2993 ret, lcore_id); 2994 2995 ret = rte_power_pmd_mgmt_set_scaling_freq_min(lcore_id, 2996 scale_freq_min); 2997 if (ret < 0) 2998 rte_exit(EXIT_FAILURE, 2999 "Error setting scaling freq min: err=%d, lcore=%d\n", 3000 ret, lcore_id); 3001 3002 ret = rte_power_pmd_mgmt_set_scaling_freq_max(lcore_id, 3003 scale_freq_max); 3004 if (ret < 0) 3005 rte_exit(EXIT_FAILURE, 3006 "Error setting scaling freq max: err=%d, lcore %d\n", 3007 ret, lcore_id); 3008 3009 ret = rte_power_ethdev_pmgmt_queue_enable( 3010 lcore_id, portid, queueid, 3011 pmgmt_type); 3012 if (ret < 0) 3013 rte_exit(EXIT_FAILURE, 3014 "rte_power_ethdev_pmgmt_queue_enable: err=%d, port=%d\n", 3015 ret, portid); 3016 } 3017 } 3018 } 3019 /* >8 End of power library initialization. */ 3020 3021 printf("\n"); 3022 3023 /* start ports */ 3024 RTE_ETH_FOREACH_DEV(portid) { 3025 if ((enabled_port_mask & (1 << portid)) == 0) { 3026 continue; 3027 } 3028 /* Start device */ 3029 ret = rte_eth_dev_start(portid); 3030 if (ret < 0) 3031 rte_exit(EXIT_FAILURE, "rte_eth_dev_start: err=%d, " 3032 "port=%d\n", ret, portid); 3033 /* 3034 * If enabled, put device in promiscuous mode. 3035 * This allows IO forwarding mode to forward packets 3036 * to itself through 2 cross-connected ports of the 3037 * target machine. 3038 */ 3039 if (promiscuous_on) { 3040 ret = rte_eth_promiscuous_enable(portid); 3041 if (ret != 0) 3042 rte_exit(EXIT_FAILURE, 3043 "rte_eth_promiscuous_enable: err=%s, port=%u\n", 3044 rte_strerror(-ret), portid); 3045 } 3046 /* initialize spinlock for each port */ 3047 rte_spinlock_init(&(locks[portid])); 3048 3049 if (!parse_ptype) 3050 if (!check_ptype(portid)) 3051 rte_exit(EXIT_FAILURE, 3052 "PMD can not provide needed ptypes\n"); 3053 } 3054 3055 check_all_ports_link_status(enabled_port_mask); 3056 3057 if (app_mode == APP_MODE_EMPTY_POLL) { 3058 3059 if (empty_poll_train) { 3060 policy.state = TRAINING; 3061 } else { 3062 policy.state = MED_NORMAL; 3063 policy.med_base_edpi = ep_med_edpi; 3064 policy.hgh_base_edpi = ep_hgh_edpi; 3065 } 3066 3067 ret = rte_power_empty_poll_stat_init(&ep_params, 3068 freq_tlb, 3069 &policy); 3070 if (ret < 0) 3071 rte_exit(EXIT_FAILURE, "empty poll init failed"); 3072 } 3073 3074 3075 /* launch per-lcore init on every lcore */ 3076 if (app_mode == APP_MODE_LEGACY) { 3077 rte_eal_mp_remote_launch(main_legacy_loop, NULL, CALL_MAIN); 3078 } else if (app_mode == APP_MODE_EMPTY_POLL) { 3079 empty_poll_stop = false; 3080 rte_eal_mp_remote_launch(main_empty_poll_loop, NULL, 3081 SKIP_MAIN); 3082 } else if (app_mode == APP_MODE_TELEMETRY) { 3083 unsigned int i; 3084 3085 /* Init metrics library */ 3086 rte_metrics_init(rte_socket_id()); 3087 /** Register stats with metrics library */ 3088 for (i = 0; i < NUM_TELSTATS; i++) 3089 ptr_strings[i] = telstats_strings[i].name; 3090 3091 ret = rte_metrics_reg_names(ptr_strings, NUM_TELSTATS); 3092 if (ret >= 0) 3093 telstats_index = ret; 3094 else 3095 rte_exit(EXIT_FAILURE, "failed to register metrics names"); 3096 3097 RTE_LCORE_FOREACH_WORKER(lcore_id) { 3098 rte_spinlock_init(&stats[lcore_id].telemetry_lock); 3099 } 3100 rte_timer_init(&telemetry_timer); 3101 rte_telemetry_register_cmd("/l3fwd-power/stats", 3102 handle_app_stats, 3103 "Returns global power stats. Parameters: None"); 3104 rte_eal_mp_remote_launch(main_telemetry_loop, NULL, 3105 SKIP_MAIN); 3106 } else if (app_mode == APP_MODE_INTERRUPT) { 3107 rte_eal_mp_remote_launch(main_intr_loop, NULL, CALL_MAIN); 3108 } else if (app_mode == APP_MODE_PMD_MGMT) { 3109 /* reuse telemetry loop for PMD power management mode */ 3110 rte_eal_mp_remote_launch(main_telemetry_loop, NULL, CALL_MAIN); 3111 } 3112 3113 if (app_mode == APP_MODE_EMPTY_POLL || app_mode == APP_MODE_TELEMETRY) 3114 launch_timer(rte_lcore_id()); 3115 3116 RTE_LCORE_FOREACH_WORKER(lcore_id) { 3117 if (rte_eal_wait_lcore(lcore_id) < 0) 3118 return -1; 3119 } 3120 3121 if (app_mode == APP_MODE_PMD_MGMT) { 3122 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { 3123 if (rte_lcore_is_enabled(lcore_id) == 0) 3124 continue; 3125 qconf = &lcore_conf[lcore_id]; 3126 for (queue = 0; queue < qconf->n_rx_queue; ++queue) { 3127 portid = qconf->rx_queue_list[queue].port_id; 3128 queueid = qconf->rx_queue_list[queue].queue_id; 3129 3130 rte_power_ethdev_pmgmt_queue_disable(lcore_id, 3131 portid, queueid); 3132 } 3133 } 3134 } 3135 3136 RTE_ETH_FOREACH_DEV(portid) 3137 { 3138 if ((enabled_port_mask & (1 << portid)) == 0) 3139 continue; 3140 3141 ret = rte_eth_dev_stop(portid); 3142 if (ret != 0) 3143 RTE_LOG(ERR, L3FWD_POWER, "rte_eth_dev_stop: err=%d, port=%u\n", 3144 ret, portid); 3145 3146 rte_eth_dev_close(portid); 3147 } 3148 3149 if (app_mode == APP_MODE_EMPTY_POLL) 3150 rte_power_empty_poll_stat_free(); 3151 3152 if ((app_mode == APP_MODE_LEGACY || app_mode == APP_MODE_EMPTY_POLL) && 3153 deinit_power_library()) 3154 rte_exit(EXIT_FAILURE, "deinit_power_library failed\n"); 3155 3156 if (rte_eal_cleanup() < 0) 3157 RTE_LOG(ERR, L3FWD_POWER, "EAL cleanup failed\n"); 3158 3159 return 0; 3160 } 3161