1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2018 Intel Corporation 3 */ 4 5 #include <stdio.h> 6 #include <stdlib.h> 7 #include <stdint.h> 8 #include <inttypes.h> 9 #include <sys/types.h> 10 #include <string.h> 11 #include <sys/queue.h> 12 #include <stdarg.h> 13 #include <errno.h> 14 #include <getopt.h> 15 #include <unistd.h> 16 #include <signal.h> 17 #include <math.h> 18 19 #include <rte_common.h> 20 #include <rte_byteorder.h> 21 #include <rte_log.h> 22 #include <rte_malloc.h> 23 #include <rte_memory.h> 24 #include <rte_memcpy.h> 25 #include <rte_eal.h> 26 #include <rte_launch.h> 27 #include <rte_cycles.h> 28 #include <rte_prefetch.h> 29 #include <rte_lcore.h> 30 #include <rte_per_lcore.h> 31 #include <rte_branch_prediction.h> 32 #include <rte_interrupts.h> 33 #include <rte_random.h> 34 #include <rte_debug.h> 35 #include <rte_ether.h> 36 #include <rte_ethdev.h> 37 #include <rte_mempool.h> 38 #include <rte_mbuf.h> 39 #include <rte_ip.h> 40 #include <rte_tcp.h> 41 #include <rte_udp.h> 42 #include <rte_string_fns.h> 43 #include <rte_timer.h> 44 #include <rte_power.h> 45 #include <rte_spinlock.h> 46 #include <rte_metrics.h> 47 #include <rte_telemetry.h> 48 #include <rte_power_pmd_mgmt.h> 49 #include <rte_power_intel_uncore.h> 50 51 #include "perf_core.h" 52 #include "main.h" 53 54 #define RTE_LOGTYPE_L3FWD_POWER RTE_LOGTYPE_USER1 55 56 #define MAX_PKT_BURST 32 57 58 #define MIN_ZERO_POLL_COUNT 10 59 60 /* 100 ms interval */ 61 #define TIMER_NUMBER_PER_SECOND 10 62 /* (10ms) */ 63 #define INTERVALS_PER_SECOND 100 64 /* 100000 us */ 65 #define SCALING_PERIOD (1000000/TIMER_NUMBER_PER_SECOND) 66 #define SCALING_DOWN_TIME_RATIO_THRESHOLD 0.25 67 68 #define APP_LOOKUP_EXACT_MATCH 0 69 #define APP_LOOKUP_LPM 1 70 #define DO_RFC_1812_CHECKS 71 72 #ifndef APP_LOOKUP_METHOD 73 #define APP_LOOKUP_METHOD APP_LOOKUP_LPM 74 #endif 75 76 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 77 #include <rte_hash.h> 78 #elif (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 79 #include <rte_lpm.h> 80 #else 81 #error "APP_LOOKUP_METHOD set to incorrect value" 82 #endif 83 84 #ifndef IPv6_BYTES 85 #define IPv6_BYTES_FMT "%02x%02x:%02x%02x:%02x%02x:%02x%02x:"\ 86 "%02x%02x:%02x%02x:%02x%02x:%02x%02x" 87 #define IPv6_BYTES(addr) \ 88 addr[0], addr[1], addr[2], addr[3], \ 89 addr[4], addr[5], addr[6], addr[7], \ 90 addr[8], addr[9], addr[10], addr[11],\ 91 addr[12], addr[13],addr[14], addr[15] 92 #endif 93 94 #define MAX_JUMBO_PKT_LEN 9600 95 96 #define IPV6_ADDR_LEN 16 97 98 #define MEMPOOL_CACHE_SIZE 256 99 100 /* 101 * This expression is used to calculate the number of mbufs needed depending on 102 * user input, taking into account memory for rx and tx hardware rings, cache 103 * per lcore and mtable per port per lcore. RTE_MAX is used to ensure that 104 * NB_MBUF never goes below a minimum value of 8192. 105 */ 106 107 #define NB_MBUF RTE_MAX ( \ 108 (nb_ports*nb_rx_queue*nb_rxd + \ 109 nb_ports*nb_lcores*MAX_PKT_BURST + \ 110 nb_ports*n_tx_queue*nb_txd + \ 111 nb_lcores*MEMPOOL_CACHE_SIZE), \ 112 (unsigned)8192) 113 114 #define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */ 115 116 #define NB_SOCKETS 8 117 118 /* Configure how many packets ahead to prefetch, when reading packets */ 119 #define PREFETCH_OFFSET 3 120 121 /* 122 * Configurable number of RX/TX ring descriptors 123 */ 124 #define RX_DESC_DEFAULT 1024 125 #define TX_DESC_DEFAULT 1024 126 127 #define NUM_TELSTATS RTE_DIM(telstats_strings) 128 129 static uint16_t nb_rxd = RX_DESC_DEFAULT; 130 static uint16_t nb_txd = TX_DESC_DEFAULT; 131 132 /* ethernet addresses of ports */ 133 static struct rte_ether_addr ports_eth_addr[RTE_MAX_ETHPORTS]; 134 135 /* ethernet addresses of ports */ 136 static rte_spinlock_t locks[RTE_MAX_ETHPORTS]; 137 138 /* mask of enabled ports */ 139 static uint32_t enabled_port_mask = 0; 140 /* Ports set in promiscuous mode off by default. */ 141 static int promiscuous_on = 0; 142 /* NUMA is enabled by default. */ 143 static int numa_on = 1; 144 volatile bool quit_signal; 145 /* timer to update telemetry every 500ms */ 146 static struct rte_timer telemetry_timer; 147 148 /* stats index returned by metrics lib */ 149 int telstats_index; 150 151 /* flag to check if uncore option enabled */ 152 int enabled_uncore = -1; 153 154 struct telstats_name { 155 char name[RTE_ETH_XSTATS_NAME_SIZE]; 156 }; 157 158 /* telemetry stats to be reported */ 159 const struct telstats_name telstats_strings[] = { 160 {"empty_poll"}, 161 {"full_poll"}, 162 {"busy_percent"} 163 }; 164 165 /* core busyness in percentage */ 166 enum busy_rate { 167 ZERO = 0, 168 PARTIAL = 50, 169 FULL = 100 170 }; 171 172 enum uncore_choice { 173 UNCORE_MIN = 0, 174 UNCORE_MAX = 1, 175 UNCORE_IDX = 2 176 }; 177 178 /* reference poll count to measure core busyness */ 179 #define DEFAULT_COUNT 10000 180 /* 181 * reference CYCLES to be used to 182 * measure core busyness based on poll count 183 */ 184 #define MIN_CYCLES 1500000ULL 185 #define MAX_CYCLES 22000000ULL 186 187 /* (500ms) */ 188 #define TELEMETRY_INTERVALS_PER_SEC 2 189 190 static int parse_ptype; /**< Parse packet type using rx callback, and */ 191 /**< disabled by default */ 192 193 enum appmode { 194 APP_MODE_DEFAULT = 0, 195 APP_MODE_LEGACY, 196 APP_MODE_TELEMETRY, 197 APP_MODE_INTERRUPT, 198 APP_MODE_PMD_MGMT 199 }; 200 201 enum appmode app_mode; 202 203 static enum rte_power_pmd_mgmt_type pmgmt_type; 204 bool baseline_enabled; 205 206 enum freq_scale_hint_t 207 { 208 FREQ_LOWER = -1, 209 FREQ_CURRENT = 0, 210 FREQ_HIGHER = 1, 211 FREQ_HIGHEST = 2 212 }; 213 214 struct lcore_rx_queue { 215 uint16_t port_id; 216 uint8_t queue_id; 217 enum freq_scale_hint_t freq_up_hint; 218 uint32_t zero_rx_packet_count; 219 uint32_t idle_hint; 220 } __rte_cache_aligned; 221 222 #define MAX_RX_QUEUE_PER_LCORE 16 223 #define MAX_TX_QUEUE_PER_PORT RTE_MAX_ETHPORTS 224 #define MAX_RX_QUEUE_PER_PORT 128 225 226 #define MAX_RX_QUEUE_INTERRUPT_PER_PORT 16 227 228 229 struct lcore_params lcore_params_array[MAX_LCORE_PARAMS]; 230 static struct lcore_params lcore_params_array_default[] = { 231 {0, 0, 2}, 232 {0, 1, 2}, 233 {0, 2, 2}, 234 {1, 0, 2}, 235 {1, 1, 2}, 236 {1, 2, 2}, 237 {2, 0, 2}, 238 {3, 0, 3}, 239 {3, 1, 3}, 240 }; 241 242 struct lcore_params *lcore_params = lcore_params_array_default; 243 uint16_t nb_lcore_params = RTE_DIM(lcore_params_array_default); 244 245 static struct rte_eth_conf port_conf = { 246 .rxmode = { 247 .mq_mode = RTE_ETH_MQ_RX_RSS, 248 .offloads = RTE_ETH_RX_OFFLOAD_CHECKSUM, 249 }, 250 .rx_adv_conf = { 251 .rss_conf = { 252 .rss_key = NULL, 253 .rss_hf = RTE_ETH_RSS_UDP, 254 }, 255 }, 256 .txmode = { 257 .mq_mode = RTE_ETH_MQ_TX_NONE, 258 } 259 }; 260 261 static uint32_t max_pkt_len; 262 static uint32_t max_empty_polls = 512; 263 static uint32_t pause_duration = 1; 264 static uint32_t scale_freq_min; 265 static uint32_t scale_freq_max; 266 267 static struct rte_mempool * pktmbuf_pool[NB_SOCKETS]; 268 269 270 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 271 272 #ifdef RTE_ARCH_X86 273 #include <rte_hash_crc.h> 274 #define DEFAULT_HASH_FUNC rte_hash_crc 275 #else 276 #include <rte_jhash.h> 277 #define DEFAULT_HASH_FUNC rte_jhash 278 #endif 279 280 struct ipv4_5tuple { 281 uint32_t ip_dst; 282 uint32_t ip_src; 283 uint16_t port_dst; 284 uint16_t port_src; 285 uint8_t proto; 286 } __rte_packed; 287 288 struct ipv6_5tuple { 289 uint8_t ip_dst[IPV6_ADDR_LEN]; 290 uint8_t ip_src[IPV6_ADDR_LEN]; 291 uint16_t port_dst; 292 uint16_t port_src; 293 uint8_t proto; 294 } __rte_packed; 295 296 struct ipv4_l3fwd_route { 297 struct ipv4_5tuple key; 298 uint8_t if_out; 299 }; 300 301 struct ipv6_l3fwd_route { 302 struct ipv6_5tuple key; 303 uint8_t if_out; 304 }; 305 306 static struct ipv4_l3fwd_route ipv4_l3fwd_route_array[] = { 307 {{RTE_IPV4(100,10,0,1), RTE_IPV4(200,10,0,1), 101, 11, IPPROTO_TCP}, 0}, 308 {{RTE_IPV4(100,20,0,2), RTE_IPV4(200,20,0,2), 102, 12, IPPROTO_TCP}, 1}, 309 {{RTE_IPV4(100,30,0,3), RTE_IPV4(200,30,0,3), 103, 13, IPPROTO_TCP}, 2}, 310 {{RTE_IPV4(100,40,0,4), RTE_IPV4(200,40,0,4), 104, 14, IPPROTO_TCP}, 3}, 311 }; 312 313 static struct ipv6_l3fwd_route ipv6_l3fwd_route_array[] = { 314 { 315 { 316 {0xfe, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 317 0x02, 0x1b, 0x21, 0xff, 0xfe, 0x91, 0x38, 0x05}, 318 {0xfe, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 319 0x02, 0x1e, 0x67, 0xff, 0xfe, 0x0d, 0xb6, 0x0a}, 320 1, 10, IPPROTO_UDP 321 }, 4 322 }, 323 }; 324 325 typedef struct rte_hash lookup_struct_t; 326 static lookup_struct_t *ipv4_l3fwd_lookup_struct[NB_SOCKETS]; 327 static lookup_struct_t *ipv6_l3fwd_lookup_struct[NB_SOCKETS]; 328 329 #define L3FWD_HASH_ENTRIES 1024 330 331 static uint16_t ipv4_l3fwd_out_if[L3FWD_HASH_ENTRIES] __rte_cache_aligned; 332 static uint16_t ipv6_l3fwd_out_if[L3FWD_HASH_ENTRIES] __rte_cache_aligned; 333 #endif 334 335 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 336 struct ipv4_l3fwd_route { 337 uint32_t ip; 338 uint8_t depth; 339 uint8_t if_out; 340 }; 341 342 static struct ipv4_l3fwd_route ipv4_l3fwd_route_array[] = { 343 {RTE_IPV4(1,1,1,0), 24, 0}, 344 {RTE_IPV4(2,1,1,0), 24, 1}, 345 {RTE_IPV4(3,1,1,0), 24, 2}, 346 {RTE_IPV4(4,1,1,0), 24, 3}, 347 {RTE_IPV4(5,1,1,0), 24, 4}, 348 {RTE_IPV4(6,1,1,0), 24, 5}, 349 {RTE_IPV4(7,1,1,0), 24, 6}, 350 {RTE_IPV4(8,1,1,0), 24, 7}, 351 }; 352 353 #define IPV4_L3FWD_LPM_MAX_RULES 1024 354 355 typedef struct rte_lpm lookup_struct_t; 356 static lookup_struct_t *ipv4_l3fwd_lookup_struct[NB_SOCKETS]; 357 #endif 358 359 struct lcore_conf { 360 uint16_t n_rx_queue; 361 struct lcore_rx_queue rx_queue_list[MAX_RX_QUEUE_PER_LCORE]; 362 uint16_t n_tx_port; 363 uint16_t tx_port_id[RTE_MAX_ETHPORTS]; 364 uint16_t tx_queue_id[RTE_MAX_ETHPORTS]; 365 struct rte_eth_dev_tx_buffer *tx_buffer[RTE_MAX_ETHPORTS]; 366 lookup_struct_t * ipv4_lookup_struct; 367 lookup_struct_t * ipv6_lookup_struct; 368 } __rte_cache_aligned; 369 370 struct lcore_stats { 371 /* total sleep time in ms since last frequency scaling down */ 372 uint32_t sleep_time; 373 /* number of long sleep recently */ 374 uint32_t nb_long_sleep; 375 /* freq. scaling up trend */ 376 uint32_t trend; 377 /* total packet processed recently */ 378 uint64_t nb_rx_processed; 379 /* total iterations looped recently */ 380 uint64_t nb_iteration_looped; 381 /* 382 * Represents empty and non empty polls 383 * of rte_eth_rx_burst(); 384 * ep_nep[0] holds non empty polls 385 * i.e. 0 < nb_rx <= MAX_BURST 386 * ep_nep[1] holds empty polls. 387 * i.e. nb_rx == 0 388 */ 389 uint64_t ep_nep[2]; 390 /* 391 * Represents full and empty+partial 392 * polls of rte_eth_rx_burst(); 393 * ep_nep[0] holds empty+partial polls. 394 * i.e. 0 <= nb_rx < MAX_BURST 395 * ep_nep[1] holds full polls 396 * i.e. nb_rx == MAX_BURST 397 */ 398 uint64_t fp_nfp[2]; 399 enum busy_rate br; 400 rte_spinlock_t telemetry_lock; 401 } __rte_cache_aligned; 402 403 static struct lcore_conf lcore_conf[RTE_MAX_LCORE] __rte_cache_aligned; 404 static struct lcore_stats stats[RTE_MAX_LCORE] __rte_cache_aligned; 405 static struct rte_timer power_timers[RTE_MAX_LCORE]; 406 407 static inline uint32_t power_idle_heuristic(uint32_t zero_rx_packet_count); 408 static inline enum freq_scale_hint_t power_freq_scaleup_heuristic( \ 409 unsigned int lcore_id, uint16_t port_id, uint16_t queue_id); 410 411 static int is_done(void) 412 { 413 return quit_signal; 414 } 415 416 /* exit signal handler */ 417 static void 418 signal_exit_now(int sigtype) 419 { 420 421 if (sigtype == SIGINT) 422 quit_signal = true; 423 424 } 425 426 /* Frequency scale down timer callback */ 427 static void 428 power_timer_cb(__rte_unused struct rte_timer *tim, 429 __rte_unused void *arg) 430 { 431 uint64_t hz; 432 float sleep_time_ratio; 433 unsigned lcore_id = rte_lcore_id(); 434 435 /* accumulate total execution time in us when callback is invoked */ 436 sleep_time_ratio = (float)(stats[lcore_id].sleep_time) / 437 (float)SCALING_PERIOD; 438 /** 439 * check whether need to scale down frequency a step if it sleep a lot. 440 */ 441 if (sleep_time_ratio >= SCALING_DOWN_TIME_RATIO_THRESHOLD) { 442 if (rte_power_freq_down) 443 rte_power_freq_down(lcore_id); 444 } 445 else if ( (unsigned)(stats[lcore_id].nb_rx_processed / 446 stats[lcore_id].nb_iteration_looped) < MAX_PKT_BURST) { 447 /** 448 * scale down a step if average packet per iteration less 449 * than expectation. 450 */ 451 if (rte_power_freq_down) 452 rte_power_freq_down(lcore_id); 453 } 454 455 /** 456 * initialize another timer according to current frequency to ensure 457 * timer interval is relatively fixed. 458 */ 459 hz = rte_get_timer_hz(); 460 rte_timer_reset(&power_timers[lcore_id], hz/TIMER_NUMBER_PER_SECOND, 461 SINGLE, lcore_id, power_timer_cb, NULL); 462 463 stats[lcore_id].nb_rx_processed = 0; 464 stats[lcore_id].nb_iteration_looped = 0; 465 466 stats[lcore_id].sleep_time = 0; 467 } 468 469 /* Enqueue a single packet, and send burst if queue is filled */ 470 static inline int 471 send_single_packet(struct rte_mbuf *m, uint16_t port) 472 { 473 uint32_t lcore_id; 474 struct lcore_conf *qconf; 475 476 lcore_id = rte_lcore_id(); 477 qconf = &lcore_conf[lcore_id]; 478 479 rte_eth_tx_buffer(port, qconf->tx_queue_id[port], 480 qconf->tx_buffer[port], m); 481 482 return 0; 483 } 484 485 #ifdef DO_RFC_1812_CHECKS 486 static inline int 487 is_valid_ipv4_pkt(struct rte_ipv4_hdr *pkt, uint32_t link_len) 488 { 489 /* From http://www.rfc-editor.org/rfc/rfc1812.txt section 5.2.2 */ 490 /* 491 * 1. The packet length reported by the Link Layer must be large 492 * enough to hold the minimum length legal IP datagram (20 bytes). 493 */ 494 if (link_len < sizeof(struct rte_ipv4_hdr)) 495 return -1; 496 497 /* 2. The IP checksum must be correct. */ 498 /* if this is not checked in H/W, check it. */ 499 if ((port_conf.rxmode.offloads & RTE_ETH_RX_OFFLOAD_IPV4_CKSUM) == 0) { 500 uint16_t actual_cksum, expected_cksum; 501 actual_cksum = pkt->hdr_checksum; 502 pkt->hdr_checksum = 0; 503 expected_cksum = rte_ipv4_cksum(pkt); 504 if (actual_cksum != expected_cksum) 505 return -2; 506 } 507 508 /* 509 * 3. The IP version number must be 4. If the version number is not 4 510 * then the packet may be another version of IP, such as IPng or 511 * ST-II. 512 */ 513 if (((pkt->version_ihl) >> 4) != 4) 514 return -3; 515 /* 516 * 4. The IP header length field must be large enough to hold the 517 * minimum length legal IP datagram (20 bytes = 5 words). 518 */ 519 if ((pkt->version_ihl & 0xf) < 5) 520 return -4; 521 522 /* 523 * 5. The IP total length field must be large enough to hold the IP 524 * datagram header, whose length is specified in the IP header length 525 * field. 526 */ 527 if (rte_cpu_to_be_16(pkt->total_length) < sizeof(struct rte_ipv4_hdr)) 528 return -5; 529 530 return 0; 531 } 532 #endif 533 534 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 535 static void 536 print_ipv4_key(struct ipv4_5tuple key) 537 { 538 printf("IP dst = %08x, IP src = %08x, port dst = %d, port src = %d, " 539 "proto = %d\n", (unsigned)key.ip_dst, (unsigned)key.ip_src, 540 key.port_dst, key.port_src, key.proto); 541 } 542 static void 543 print_ipv6_key(struct ipv6_5tuple key) 544 { 545 printf( "IP dst = " IPv6_BYTES_FMT ", IP src = " IPv6_BYTES_FMT ", " 546 "port dst = %d, port src = %d, proto = %d\n", 547 IPv6_BYTES(key.ip_dst), IPv6_BYTES(key.ip_src), 548 key.port_dst, key.port_src, key.proto); 549 } 550 551 static inline uint16_t 552 get_ipv4_dst_port(struct rte_ipv4_hdr *ipv4_hdr, uint16_t portid, 553 lookup_struct_t * ipv4_l3fwd_lookup_struct) 554 { 555 struct ipv4_5tuple key; 556 struct rte_tcp_hdr *tcp; 557 struct rte_udp_hdr *udp; 558 int ret = 0; 559 560 key.ip_dst = rte_be_to_cpu_32(ipv4_hdr->dst_addr); 561 key.ip_src = rte_be_to_cpu_32(ipv4_hdr->src_addr); 562 key.proto = ipv4_hdr->next_proto_id; 563 564 switch (ipv4_hdr->next_proto_id) { 565 case IPPROTO_TCP: 566 tcp = (struct rte_tcp_hdr *)((unsigned char *)ipv4_hdr + 567 sizeof(struct rte_ipv4_hdr)); 568 key.port_dst = rte_be_to_cpu_16(tcp->dst_port); 569 key.port_src = rte_be_to_cpu_16(tcp->src_port); 570 break; 571 572 case IPPROTO_UDP: 573 udp = (struct rte_udp_hdr *)((unsigned char *)ipv4_hdr + 574 sizeof(struct rte_ipv4_hdr)); 575 key.port_dst = rte_be_to_cpu_16(udp->dst_port); 576 key.port_src = rte_be_to_cpu_16(udp->src_port); 577 break; 578 579 default: 580 key.port_dst = 0; 581 key.port_src = 0; 582 break; 583 } 584 585 /* Find destination port */ 586 ret = rte_hash_lookup(ipv4_l3fwd_lookup_struct, (const void *)&key); 587 return ((ret < 0) ? portid : ipv4_l3fwd_out_if[ret]); 588 } 589 590 static inline uint16_t 591 get_ipv6_dst_port(struct rte_ipv6_hdr *ipv6_hdr, uint16_t portid, 592 lookup_struct_t *ipv6_l3fwd_lookup_struct) 593 { 594 struct ipv6_5tuple key; 595 struct rte_tcp_hdr *tcp; 596 struct rte_udp_hdr *udp; 597 int ret = 0; 598 599 memcpy(key.ip_dst, ipv6_hdr->dst_addr, IPV6_ADDR_LEN); 600 memcpy(key.ip_src, ipv6_hdr->src_addr, IPV6_ADDR_LEN); 601 602 key.proto = ipv6_hdr->proto; 603 604 switch (ipv6_hdr->proto) { 605 case IPPROTO_TCP: 606 tcp = (struct rte_tcp_hdr *)((unsigned char *) ipv6_hdr + 607 sizeof(struct rte_ipv6_hdr)); 608 key.port_dst = rte_be_to_cpu_16(tcp->dst_port); 609 key.port_src = rte_be_to_cpu_16(tcp->src_port); 610 break; 611 612 case IPPROTO_UDP: 613 udp = (struct rte_udp_hdr *)((unsigned char *) ipv6_hdr + 614 sizeof(struct rte_ipv6_hdr)); 615 key.port_dst = rte_be_to_cpu_16(udp->dst_port); 616 key.port_src = rte_be_to_cpu_16(udp->src_port); 617 break; 618 619 default: 620 key.port_dst = 0; 621 key.port_src = 0; 622 break; 623 } 624 625 /* Find destination port */ 626 ret = rte_hash_lookup(ipv6_l3fwd_lookup_struct, (const void *)&key); 627 return ((ret < 0) ? portid : ipv6_l3fwd_out_if[ret]); 628 } 629 #endif 630 631 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 632 static inline uint16_t 633 get_ipv4_dst_port(struct rte_ipv4_hdr *ipv4_hdr, uint16_t portid, 634 lookup_struct_t *ipv4_l3fwd_lookup_struct) 635 { 636 uint32_t next_hop; 637 638 return ((rte_lpm_lookup(ipv4_l3fwd_lookup_struct, 639 rte_be_to_cpu_32(ipv4_hdr->dst_addr), &next_hop) == 0)? 640 next_hop : portid); 641 } 642 #endif 643 644 static inline void 645 parse_ptype_one(struct rte_mbuf *m) 646 { 647 struct rte_ether_hdr *eth_hdr; 648 uint32_t packet_type = RTE_PTYPE_UNKNOWN; 649 uint16_t ether_type; 650 651 eth_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); 652 ether_type = eth_hdr->ether_type; 653 if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4)) 654 packet_type |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN; 655 else if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6)) 656 packet_type |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN; 657 658 m->packet_type = packet_type; 659 } 660 661 static uint16_t 662 cb_parse_ptype(uint16_t port __rte_unused, uint16_t queue __rte_unused, 663 struct rte_mbuf *pkts[], uint16_t nb_pkts, 664 uint16_t max_pkts __rte_unused, 665 void *user_param __rte_unused) 666 { 667 unsigned int i; 668 669 for (i = 0; i < nb_pkts; ++i) 670 parse_ptype_one(pkts[i]); 671 672 return nb_pkts; 673 } 674 675 static int 676 add_cb_parse_ptype(uint16_t portid, uint16_t queueid) 677 { 678 printf("Port %d: softly parse packet type info\n", portid); 679 if (rte_eth_add_rx_callback(portid, queueid, cb_parse_ptype, NULL)) 680 return 0; 681 682 printf("Failed to add rx callback: port=%d\n", portid); 683 return -1; 684 } 685 686 static inline void 687 l3fwd_simple_forward(struct rte_mbuf *m, uint16_t portid, 688 struct lcore_conf *qconf) 689 { 690 struct rte_ether_hdr *eth_hdr; 691 struct rte_ipv4_hdr *ipv4_hdr; 692 void *d_addr_bytes; 693 uint16_t dst_port; 694 695 eth_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); 696 697 if (RTE_ETH_IS_IPV4_HDR(m->packet_type)) { 698 /* Handle IPv4 headers.*/ 699 ipv4_hdr = 700 rte_pktmbuf_mtod_offset(m, struct rte_ipv4_hdr *, 701 sizeof(struct rte_ether_hdr)); 702 703 #ifdef DO_RFC_1812_CHECKS 704 /* Check to make sure the packet is valid (RFC1812) */ 705 if (is_valid_ipv4_pkt(ipv4_hdr, m->pkt_len) < 0) { 706 rte_pktmbuf_free(m); 707 return; 708 } 709 #endif 710 711 dst_port = get_ipv4_dst_port(ipv4_hdr, portid, 712 qconf->ipv4_lookup_struct); 713 if (dst_port >= RTE_MAX_ETHPORTS || 714 (enabled_port_mask & 1 << dst_port) == 0) 715 dst_port = portid; 716 717 /* 02:00:00:00:00:xx */ 718 d_addr_bytes = ð_hdr->dst_addr.addr_bytes[0]; 719 *((uint64_t *)d_addr_bytes) = 720 0x000000000002 + ((uint64_t)dst_port << 40); 721 722 #ifdef DO_RFC_1812_CHECKS 723 /* Update time to live and header checksum */ 724 --(ipv4_hdr->time_to_live); 725 ++(ipv4_hdr->hdr_checksum); 726 #endif 727 728 /* src addr */ 729 rte_ether_addr_copy(&ports_eth_addr[dst_port], 730 ð_hdr->src_addr); 731 732 send_single_packet(m, dst_port); 733 } else if (RTE_ETH_IS_IPV6_HDR(m->packet_type)) { 734 /* Handle IPv6 headers.*/ 735 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 736 struct rte_ipv6_hdr *ipv6_hdr; 737 738 ipv6_hdr = 739 rte_pktmbuf_mtod_offset(m, struct rte_ipv6_hdr *, 740 sizeof(struct rte_ether_hdr)); 741 742 dst_port = get_ipv6_dst_port(ipv6_hdr, portid, 743 qconf->ipv6_lookup_struct); 744 745 if (dst_port >= RTE_MAX_ETHPORTS || 746 (enabled_port_mask & 1 << dst_port) == 0) 747 dst_port = portid; 748 749 /* 02:00:00:00:00:xx */ 750 d_addr_bytes = ð_hdr->dst_addr.addr_bytes[0]; 751 *((uint64_t *)d_addr_bytes) = 752 0x000000000002 + ((uint64_t)dst_port << 40); 753 754 /* src addr */ 755 rte_ether_addr_copy(&ports_eth_addr[dst_port], 756 ð_hdr->src_addr); 757 758 send_single_packet(m, dst_port); 759 #else 760 /* We don't currently handle IPv6 packets in LPM mode. */ 761 rte_pktmbuf_free(m); 762 #endif 763 } else 764 rte_pktmbuf_free(m); 765 766 } 767 768 #define MINIMUM_SLEEP_TIME 1 769 #define SUSPEND_THRESHOLD 300 770 771 static inline uint32_t 772 power_idle_heuristic(uint32_t zero_rx_packet_count) 773 { 774 /* If zero count is less than 100, sleep 1us */ 775 if (zero_rx_packet_count < SUSPEND_THRESHOLD) 776 return MINIMUM_SLEEP_TIME; 777 /* If zero count is less than 1000, sleep 100 us which is the 778 minimum latency switching from C3/C6 to C0 779 */ 780 else 781 return SUSPEND_THRESHOLD; 782 } 783 784 static inline enum freq_scale_hint_t 785 power_freq_scaleup_heuristic(unsigned lcore_id, 786 uint16_t port_id, 787 uint16_t queue_id) 788 { 789 uint32_t rxq_count = rte_eth_rx_queue_count(port_id, queue_id); 790 /** 791 * HW Rx queue size is 128 by default, Rx burst read at maximum 32 entries 792 * per iteration 793 */ 794 #define FREQ_GEAR1_RX_PACKET_THRESHOLD MAX_PKT_BURST 795 #define FREQ_GEAR2_RX_PACKET_THRESHOLD (MAX_PKT_BURST*2) 796 #define FREQ_GEAR3_RX_PACKET_THRESHOLD (MAX_PKT_BURST*3) 797 #define FREQ_UP_TREND1_ACC 1 798 #define FREQ_UP_TREND2_ACC 100 799 #define FREQ_UP_THRESHOLD 10000 800 801 if (likely(rxq_count > FREQ_GEAR3_RX_PACKET_THRESHOLD)) { 802 stats[lcore_id].trend = 0; 803 return FREQ_HIGHEST; 804 } else if (likely(rxq_count > FREQ_GEAR2_RX_PACKET_THRESHOLD)) 805 stats[lcore_id].trend += FREQ_UP_TREND2_ACC; 806 else if (likely(rxq_count > FREQ_GEAR1_RX_PACKET_THRESHOLD)) 807 stats[lcore_id].trend += FREQ_UP_TREND1_ACC; 808 809 if (likely(stats[lcore_id].trend > FREQ_UP_THRESHOLD)) { 810 stats[lcore_id].trend = 0; 811 return FREQ_HIGHER; 812 } 813 814 return FREQ_CURRENT; 815 } 816 817 /** 818 * force polling thread sleep until one-shot rx interrupt triggers 819 * @param port_id 820 * Port id. 821 * @param queue_id 822 * Rx queue id. 823 * @return 824 * 0 on success 825 */ 826 static int 827 sleep_until_rx_interrupt(int num, int lcore) 828 { 829 /* 830 * we want to track when we are woken up by traffic so that we can go 831 * back to sleep again without log spamming. Avoid cache line sharing 832 * to prevent threads stepping on each others' toes. 833 */ 834 static struct { 835 bool wakeup; 836 } __rte_cache_aligned status[RTE_MAX_LCORE]; 837 struct rte_epoll_event event[num]; 838 int n, i; 839 uint16_t port_id; 840 uint8_t queue_id; 841 void *data; 842 843 if (status[lcore].wakeup) { 844 RTE_LOG(INFO, L3FWD_POWER, 845 "lcore %u sleeps until interrupt triggers\n", 846 rte_lcore_id()); 847 } 848 849 n = rte_epoll_wait(RTE_EPOLL_PER_THREAD, event, num, 10); 850 for (i = 0; i < n; i++) { 851 data = event[i].epdata.data; 852 port_id = ((uintptr_t)data) >> CHAR_BIT; 853 queue_id = ((uintptr_t)data) & 854 RTE_LEN2MASK(CHAR_BIT, uint8_t); 855 RTE_LOG(INFO, L3FWD_POWER, 856 "lcore %u is waked up from rx interrupt on" 857 " port %d queue %d\n", 858 rte_lcore_id(), port_id, queue_id); 859 } 860 status[lcore].wakeup = n != 0; 861 862 return 0; 863 } 864 865 static void turn_on_off_intr(struct lcore_conf *qconf, bool on) 866 { 867 int i; 868 struct lcore_rx_queue *rx_queue; 869 uint8_t queue_id; 870 uint16_t port_id; 871 872 for (i = 0; i < qconf->n_rx_queue; ++i) { 873 rx_queue = &(qconf->rx_queue_list[i]); 874 port_id = rx_queue->port_id; 875 queue_id = rx_queue->queue_id; 876 877 rte_spinlock_lock(&(locks[port_id])); 878 if (on) 879 rte_eth_dev_rx_intr_enable(port_id, queue_id); 880 else 881 rte_eth_dev_rx_intr_disable(port_id, queue_id); 882 rte_spinlock_unlock(&(locks[port_id])); 883 } 884 } 885 886 static int event_register(struct lcore_conf *qconf) 887 { 888 struct lcore_rx_queue *rx_queue; 889 uint8_t queueid; 890 uint16_t portid; 891 uint32_t data; 892 int ret; 893 int i; 894 895 for (i = 0; i < qconf->n_rx_queue; ++i) { 896 rx_queue = &(qconf->rx_queue_list[i]); 897 portid = rx_queue->port_id; 898 queueid = rx_queue->queue_id; 899 data = portid << CHAR_BIT | queueid; 900 901 ret = rte_eth_dev_rx_intr_ctl_q(portid, queueid, 902 RTE_EPOLL_PER_THREAD, 903 RTE_INTR_EVENT_ADD, 904 (void *)((uintptr_t)data)); 905 if (ret) 906 return ret; 907 } 908 909 return 0; 910 } 911 912 /* Main processing loop. 8< */ 913 static int main_intr_loop(__rte_unused void *dummy) 914 { 915 struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; 916 unsigned int lcore_id; 917 uint64_t prev_tsc, diff_tsc, cur_tsc; 918 int i, j, nb_rx; 919 uint8_t queueid; 920 uint16_t portid; 921 struct lcore_conf *qconf; 922 struct lcore_rx_queue *rx_queue; 923 uint32_t lcore_rx_idle_count = 0; 924 uint32_t lcore_idle_hint = 0; 925 int intr_en = 0; 926 927 const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / 928 US_PER_S * BURST_TX_DRAIN_US; 929 930 prev_tsc = 0; 931 932 lcore_id = rte_lcore_id(); 933 qconf = &lcore_conf[lcore_id]; 934 935 if (qconf->n_rx_queue == 0) { 936 RTE_LOG(INFO, L3FWD_POWER, "lcore %u has nothing to do\n", 937 lcore_id); 938 return 0; 939 } 940 941 RTE_LOG(INFO, L3FWD_POWER, "entering main interrupt loop on lcore %u\n", 942 lcore_id); 943 944 for (i = 0; i < qconf->n_rx_queue; i++) { 945 portid = qconf->rx_queue_list[i].port_id; 946 queueid = qconf->rx_queue_list[i].queue_id; 947 RTE_LOG(INFO, L3FWD_POWER, 948 " -- lcoreid=%u portid=%u rxqueueid=%hhu\n", 949 lcore_id, portid, queueid); 950 } 951 952 /* add into event wait list */ 953 if (event_register(qconf) == 0) 954 intr_en = 1; 955 else 956 RTE_LOG(INFO, L3FWD_POWER, "RX interrupt won't enable.\n"); 957 958 while (!is_done()) { 959 stats[lcore_id].nb_iteration_looped++; 960 961 cur_tsc = rte_rdtsc(); 962 963 /* 964 * TX burst queue drain 965 */ 966 diff_tsc = cur_tsc - prev_tsc; 967 if (unlikely(diff_tsc > drain_tsc)) { 968 for (i = 0; i < qconf->n_tx_port; ++i) { 969 portid = qconf->tx_port_id[i]; 970 rte_eth_tx_buffer_flush(portid, 971 qconf->tx_queue_id[portid], 972 qconf->tx_buffer[portid]); 973 } 974 prev_tsc = cur_tsc; 975 } 976 977 start_rx: 978 /* 979 * Read packet from RX queues 980 */ 981 lcore_rx_idle_count = 0; 982 for (i = 0; i < qconf->n_rx_queue; ++i) { 983 rx_queue = &(qconf->rx_queue_list[i]); 984 rx_queue->idle_hint = 0; 985 portid = rx_queue->port_id; 986 queueid = rx_queue->queue_id; 987 988 nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst, 989 MAX_PKT_BURST); 990 991 stats[lcore_id].nb_rx_processed += nb_rx; 992 if (unlikely(nb_rx == 0)) { 993 /** 994 * no packet received from rx queue, try to 995 * sleep for a while forcing CPU enter deeper 996 * C states. 997 */ 998 rx_queue->zero_rx_packet_count++; 999 1000 if (rx_queue->zero_rx_packet_count <= 1001 MIN_ZERO_POLL_COUNT) 1002 continue; 1003 1004 rx_queue->idle_hint = power_idle_heuristic( 1005 rx_queue->zero_rx_packet_count); 1006 lcore_rx_idle_count++; 1007 } else { 1008 rx_queue->zero_rx_packet_count = 0; 1009 } 1010 1011 /* Prefetch first packets */ 1012 for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++) { 1013 rte_prefetch0(rte_pktmbuf_mtod( 1014 pkts_burst[j], void *)); 1015 } 1016 1017 /* Prefetch and forward already prefetched packets */ 1018 for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) { 1019 rte_prefetch0(rte_pktmbuf_mtod( 1020 pkts_burst[j + PREFETCH_OFFSET], 1021 void *)); 1022 l3fwd_simple_forward( 1023 pkts_burst[j], portid, qconf); 1024 } 1025 1026 /* Forward remaining prefetched packets */ 1027 for (; j < nb_rx; j++) { 1028 l3fwd_simple_forward( 1029 pkts_burst[j], portid, qconf); 1030 } 1031 } 1032 1033 if (unlikely(lcore_rx_idle_count == qconf->n_rx_queue)) { 1034 /** 1035 * All Rx queues empty in recent consecutive polls, 1036 * sleep in a conservative manner, meaning sleep as 1037 * less as possible. 1038 */ 1039 for (i = 1, 1040 lcore_idle_hint = qconf->rx_queue_list[0].idle_hint; 1041 i < qconf->n_rx_queue; ++i) { 1042 rx_queue = &(qconf->rx_queue_list[i]); 1043 if (rx_queue->idle_hint < lcore_idle_hint) 1044 lcore_idle_hint = rx_queue->idle_hint; 1045 } 1046 1047 if (lcore_idle_hint < SUSPEND_THRESHOLD) 1048 /** 1049 * execute "pause" instruction to avoid context 1050 * switch which generally take hundred of 1051 * microseconds for short sleep. 1052 */ 1053 rte_delay_us(lcore_idle_hint); 1054 else { 1055 /* suspend until rx interrupt triggers */ 1056 if (intr_en) { 1057 turn_on_off_intr(qconf, 1); 1058 sleep_until_rx_interrupt( 1059 qconf->n_rx_queue, 1060 lcore_id); 1061 turn_on_off_intr(qconf, 0); 1062 /** 1063 * start receiving packets immediately 1064 */ 1065 if (likely(!is_done())) 1066 goto start_rx; 1067 } 1068 } 1069 stats[lcore_id].sleep_time += lcore_idle_hint; 1070 } 1071 } 1072 1073 return 0; 1074 } 1075 /* >8 End of main processing loop. */ 1076 1077 /* main processing loop */ 1078 static int 1079 main_telemetry_loop(__rte_unused void *dummy) 1080 { 1081 struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; 1082 unsigned int lcore_id; 1083 uint64_t prev_tsc, diff_tsc, cur_tsc, prev_tel_tsc; 1084 int i, j, nb_rx; 1085 uint8_t queueid; 1086 uint16_t portid; 1087 struct lcore_conf *qconf; 1088 struct lcore_rx_queue *rx_queue; 1089 uint64_t ep_nep[2] = {0}, fp_nfp[2] = {0}; 1090 uint64_t poll_count; 1091 enum busy_rate br; 1092 1093 const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / 1094 US_PER_S * BURST_TX_DRAIN_US; 1095 1096 poll_count = 0; 1097 prev_tsc = 0; 1098 prev_tel_tsc = 0; 1099 1100 lcore_id = rte_lcore_id(); 1101 qconf = &lcore_conf[lcore_id]; 1102 1103 if (qconf->n_rx_queue == 0) { 1104 RTE_LOG(INFO, L3FWD_POWER, "lcore %u has nothing to do\n", 1105 lcore_id); 1106 return 0; 1107 } 1108 1109 RTE_LOG(INFO, L3FWD_POWER, "entering main telemetry loop on lcore %u\n", 1110 lcore_id); 1111 1112 for (i = 0; i < qconf->n_rx_queue; i++) { 1113 portid = qconf->rx_queue_list[i].port_id; 1114 queueid = qconf->rx_queue_list[i].queue_id; 1115 RTE_LOG(INFO, L3FWD_POWER, " -- lcoreid=%u portid=%u " 1116 "rxqueueid=%hhu\n", lcore_id, portid, queueid); 1117 } 1118 1119 while (!is_done()) { 1120 1121 cur_tsc = rte_rdtsc(); 1122 /* 1123 * TX burst queue drain 1124 */ 1125 diff_tsc = cur_tsc - prev_tsc; 1126 if (unlikely(diff_tsc > drain_tsc)) { 1127 for (i = 0; i < qconf->n_tx_port; ++i) { 1128 portid = qconf->tx_port_id[i]; 1129 rte_eth_tx_buffer_flush(portid, 1130 qconf->tx_queue_id[portid], 1131 qconf->tx_buffer[portid]); 1132 } 1133 prev_tsc = cur_tsc; 1134 } 1135 1136 /* 1137 * Read packet from RX queues 1138 */ 1139 for (i = 0; i < qconf->n_rx_queue; ++i) { 1140 rx_queue = &(qconf->rx_queue_list[i]); 1141 portid = rx_queue->port_id; 1142 queueid = rx_queue->queue_id; 1143 1144 nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst, 1145 MAX_PKT_BURST); 1146 ep_nep[nb_rx == 0]++; 1147 fp_nfp[nb_rx == MAX_PKT_BURST]++; 1148 poll_count++; 1149 if (unlikely(nb_rx == 0)) 1150 continue; 1151 1152 /* Prefetch first packets */ 1153 for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++) { 1154 rte_prefetch0(rte_pktmbuf_mtod( 1155 pkts_burst[j], void *)); 1156 } 1157 1158 /* Prefetch and forward already prefetched packets */ 1159 for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) { 1160 rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[ 1161 j + PREFETCH_OFFSET], void *)); 1162 l3fwd_simple_forward(pkts_burst[j], portid, 1163 qconf); 1164 } 1165 1166 /* Forward remaining prefetched packets */ 1167 for (; j < nb_rx; j++) { 1168 l3fwd_simple_forward(pkts_burst[j], portid, 1169 qconf); 1170 } 1171 } 1172 if (unlikely(poll_count >= DEFAULT_COUNT)) { 1173 diff_tsc = cur_tsc - prev_tel_tsc; 1174 if (diff_tsc >= MAX_CYCLES) { 1175 br = FULL; 1176 } else if (diff_tsc > MIN_CYCLES && 1177 diff_tsc < MAX_CYCLES) { 1178 br = (diff_tsc * 100) / MAX_CYCLES; 1179 } else { 1180 br = ZERO; 1181 } 1182 poll_count = 0; 1183 prev_tel_tsc = cur_tsc; 1184 /* update stats for telemetry */ 1185 rte_spinlock_lock(&stats[lcore_id].telemetry_lock); 1186 stats[lcore_id].ep_nep[0] = ep_nep[0]; 1187 stats[lcore_id].ep_nep[1] = ep_nep[1]; 1188 stats[lcore_id].fp_nfp[0] = fp_nfp[0]; 1189 stats[lcore_id].fp_nfp[1] = fp_nfp[1]; 1190 stats[lcore_id].br = br; 1191 rte_spinlock_unlock(&stats[lcore_id].telemetry_lock); 1192 } 1193 } 1194 1195 return 0; 1196 } 1197 1198 /* main processing loop */ 1199 static int 1200 main_legacy_loop(__rte_unused void *dummy) 1201 { 1202 struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; 1203 unsigned lcore_id; 1204 uint64_t prev_tsc, diff_tsc, cur_tsc, tim_res_tsc, hz; 1205 uint64_t prev_tsc_power = 0, cur_tsc_power, diff_tsc_power; 1206 int i, j, nb_rx; 1207 uint8_t queueid; 1208 uint16_t portid; 1209 struct lcore_conf *qconf; 1210 struct lcore_rx_queue *rx_queue; 1211 enum freq_scale_hint_t lcore_scaleup_hint; 1212 uint32_t lcore_rx_idle_count = 0; 1213 uint32_t lcore_idle_hint = 0; 1214 int intr_en = 0; 1215 1216 const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * BURST_TX_DRAIN_US; 1217 1218 prev_tsc = 0; 1219 hz = rte_get_timer_hz(); 1220 tim_res_tsc = hz/TIMER_NUMBER_PER_SECOND; 1221 1222 lcore_id = rte_lcore_id(); 1223 qconf = &lcore_conf[lcore_id]; 1224 1225 if (qconf->n_rx_queue == 0) { 1226 RTE_LOG(INFO, L3FWD_POWER, "lcore %u has nothing to do\n", lcore_id); 1227 return 0; 1228 } 1229 1230 RTE_LOG(INFO, L3FWD_POWER, "entering main loop on lcore %u\n", lcore_id); 1231 1232 for (i = 0; i < qconf->n_rx_queue; i++) { 1233 portid = qconf->rx_queue_list[i].port_id; 1234 queueid = qconf->rx_queue_list[i].queue_id; 1235 RTE_LOG(INFO, L3FWD_POWER, " -- lcoreid=%u portid=%u " 1236 "rxqueueid=%hhu\n", lcore_id, portid, queueid); 1237 } 1238 1239 /* add into event wait list */ 1240 if (event_register(qconf) == 0) 1241 intr_en = 1; 1242 else 1243 RTE_LOG(INFO, L3FWD_POWER, "RX interrupt won't enable.\n"); 1244 1245 while (!is_done()) { 1246 stats[lcore_id].nb_iteration_looped++; 1247 1248 cur_tsc = rte_rdtsc(); 1249 cur_tsc_power = cur_tsc; 1250 1251 /* 1252 * TX burst queue drain 1253 */ 1254 diff_tsc = cur_tsc - prev_tsc; 1255 if (unlikely(diff_tsc > drain_tsc)) { 1256 for (i = 0; i < qconf->n_tx_port; ++i) { 1257 portid = qconf->tx_port_id[i]; 1258 rte_eth_tx_buffer_flush(portid, 1259 qconf->tx_queue_id[portid], 1260 qconf->tx_buffer[portid]); 1261 } 1262 prev_tsc = cur_tsc; 1263 } 1264 1265 diff_tsc_power = cur_tsc_power - prev_tsc_power; 1266 if (diff_tsc_power > tim_res_tsc) { 1267 rte_timer_manage(); 1268 prev_tsc_power = cur_tsc_power; 1269 } 1270 1271 start_rx: 1272 /* 1273 * Read packet from RX queues 1274 */ 1275 lcore_scaleup_hint = FREQ_CURRENT; 1276 lcore_rx_idle_count = 0; 1277 for (i = 0; i < qconf->n_rx_queue; ++i) { 1278 rx_queue = &(qconf->rx_queue_list[i]); 1279 rx_queue->idle_hint = 0; 1280 portid = rx_queue->port_id; 1281 queueid = rx_queue->queue_id; 1282 1283 nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst, 1284 MAX_PKT_BURST); 1285 1286 stats[lcore_id].nb_rx_processed += nb_rx; 1287 if (unlikely(nb_rx == 0)) { 1288 /** 1289 * no packet received from rx queue, try to 1290 * sleep for a while forcing CPU enter deeper 1291 * C states. 1292 */ 1293 rx_queue->zero_rx_packet_count++; 1294 1295 if (rx_queue->zero_rx_packet_count <= 1296 MIN_ZERO_POLL_COUNT) 1297 continue; 1298 1299 rx_queue->idle_hint = power_idle_heuristic(\ 1300 rx_queue->zero_rx_packet_count); 1301 lcore_rx_idle_count++; 1302 } else { 1303 rx_queue->zero_rx_packet_count = 0; 1304 1305 /** 1306 * do not scale up frequency immediately as 1307 * user to kernel space communication is costly 1308 * which might impact packet I/O for received 1309 * packets. 1310 */ 1311 rx_queue->freq_up_hint = 1312 power_freq_scaleup_heuristic(lcore_id, 1313 portid, queueid); 1314 } 1315 1316 /* Prefetch first packets */ 1317 for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++) { 1318 rte_prefetch0(rte_pktmbuf_mtod( 1319 pkts_burst[j], void *)); 1320 } 1321 1322 /* Prefetch and forward already prefetched packets */ 1323 for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) { 1324 rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[ 1325 j + PREFETCH_OFFSET], void *)); 1326 l3fwd_simple_forward(pkts_burst[j], portid, 1327 qconf); 1328 } 1329 1330 /* Forward remaining prefetched packets */ 1331 for (; j < nb_rx; j++) { 1332 l3fwd_simple_forward(pkts_burst[j], portid, 1333 qconf); 1334 } 1335 } 1336 1337 if (likely(lcore_rx_idle_count != qconf->n_rx_queue)) { 1338 for (i = 1, lcore_scaleup_hint = 1339 qconf->rx_queue_list[0].freq_up_hint; 1340 i < qconf->n_rx_queue; ++i) { 1341 rx_queue = &(qconf->rx_queue_list[i]); 1342 if (rx_queue->freq_up_hint > 1343 lcore_scaleup_hint) 1344 lcore_scaleup_hint = 1345 rx_queue->freq_up_hint; 1346 } 1347 1348 if (lcore_scaleup_hint == FREQ_HIGHEST) { 1349 if (rte_power_freq_max) 1350 rte_power_freq_max(lcore_id); 1351 } else if (lcore_scaleup_hint == FREQ_HIGHER) { 1352 if (rte_power_freq_up) 1353 rte_power_freq_up(lcore_id); 1354 } 1355 } else { 1356 /** 1357 * All Rx queues empty in recent consecutive polls, 1358 * sleep in a conservative manner, meaning sleep as 1359 * less as possible. 1360 */ 1361 for (i = 1, lcore_idle_hint = 1362 qconf->rx_queue_list[0].idle_hint; 1363 i < qconf->n_rx_queue; ++i) { 1364 rx_queue = &(qconf->rx_queue_list[i]); 1365 if (rx_queue->idle_hint < lcore_idle_hint) 1366 lcore_idle_hint = rx_queue->idle_hint; 1367 } 1368 1369 if (lcore_idle_hint < SUSPEND_THRESHOLD) 1370 /** 1371 * execute "pause" instruction to avoid context 1372 * switch which generally take hundred of 1373 * microseconds for short sleep. 1374 */ 1375 rte_delay_us(lcore_idle_hint); 1376 else { 1377 /* suspend until rx interrupt triggers */ 1378 if (intr_en) { 1379 turn_on_off_intr(qconf, 1); 1380 sleep_until_rx_interrupt( 1381 qconf->n_rx_queue, 1382 lcore_id); 1383 turn_on_off_intr(qconf, 0); 1384 /** 1385 * start receiving packets immediately 1386 */ 1387 if (likely(!is_done())) 1388 goto start_rx; 1389 } 1390 } 1391 stats[lcore_id].sleep_time += lcore_idle_hint; 1392 } 1393 } 1394 1395 return 0; 1396 } 1397 1398 static int 1399 check_lcore_params(void) 1400 { 1401 uint8_t queue, lcore; 1402 uint16_t i; 1403 int socketid; 1404 1405 for (i = 0; i < nb_lcore_params; ++i) { 1406 queue = lcore_params[i].queue_id; 1407 if (queue >= MAX_RX_QUEUE_PER_PORT) { 1408 printf("invalid queue number: %hhu\n", queue); 1409 return -1; 1410 } 1411 lcore = lcore_params[i].lcore_id; 1412 if (!rte_lcore_is_enabled(lcore)) { 1413 printf("error: lcore %hhu is not enabled in lcore " 1414 "mask\n", lcore); 1415 return -1; 1416 } 1417 if ((socketid = rte_lcore_to_socket_id(lcore) != 0) && 1418 (numa_on == 0)) { 1419 printf("warning: lcore %hhu is on socket %d with numa " 1420 "off\n", lcore, socketid); 1421 } 1422 if (app_mode == APP_MODE_TELEMETRY && lcore == rte_lcore_id()) { 1423 printf("cannot enable main core %d in config for telemetry mode\n", 1424 rte_lcore_id()); 1425 return -1; 1426 } 1427 } 1428 return 0; 1429 } 1430 1431 static int 1432 check_port_config(void) 1433 { 1434 unsigned portid; 1435 uint16_t i; 1436 1437 for (i = 0; i < nb_lcore_params; ++i) { 1438 portid = lcore_params[i].port_id; 1439 if ((enabled_port_mask & (1 << portid)) == 0) { 1440 printf("port %u is not enabled in port mask\n", 1441 portid); 1442 return -1; 1443 } 1444 if (!rte_eth_dev_is_valid_port(portid)) { 1445 printf("port %u is not present on the board\n", 1446 portid); 1447 return -1; 1448 } 1449 } 1450 return 0; 1451 } 1452 1453 static uint8_t 1454 get_port_n_rx_queues(const uint16_t port) 1455 { 1456 int queue = -1; 1457 uint16_t i; 1458 1459 for (i = 0; i < nb_lcore_params; ++i) { 1460 if (lcore_params[i].port_id == port && 1461 lcore_params[i].queue_id > queue) 1462 queue = lcore_params[i].queue_id; 1463 } 1464 return (uint8_t)(++queue); 1465 } 1466 1467 static int 1468 init_lcore_rx_queues(void) 1469 { 1470 uint16_t i, nb_rx_queue; 1471 uint8_t lcore; 1472 1473 for (i = 0; i < nb_lcore_params; ++i) { 1474 lcore = lcore_params[i].lcore_id; 1475 nb_rx_queue = lcore_conf[lcore].n_rx_queue; 1476 if (nb_rx_queue >= MAX_RX_QUEUE_PER_LCORE) { 1477 printf("error: too many queues (%u) for lcore: %u\n", 1478 (unsigned)nb_rx_queue + 1, (unsigned)lcore); 1479 return -1; 1480 } else { 1481 lcore_conf[lcore].rx_queue_list[nb_rx_queue].port_id = 1482 lcore_params[i].port_id; 1483 lcore_conf[lcore].rx_queue_list[nb_rx_queue].queue_id = 1484 lcore_params[i].queue_id; 1485 lcore_conf[lcore].n_rx_queue++; 1486 } 1487 } 1488 return 0; 1489 } 1490 1491 /* display usage */ 1492 static void 1493 print_usage(const char *prgname) 1494 { 1495 printf ("%s [EAL options] -- -p PORTMASK -P" 1496 " [--config (port,queue,lcore)[,(port,queue,lcore]]" 1497 " [--high-perf-cores CORELIST" 1498 " [--perf-config (port,queue,hi_perf,lcore_index)[,(port,queue,hi_perf,lcore_index]]" 1499 " [--max-pkt-len PKTLEN]\n" 1500 " -p PORTMASK: hexadecimal bitmask of ports to configure\n" 1501 " -P: enable promiscuous mode\n" 1502 " -u: set min/max frequency for uncore to minimum value\n" 1503 " -U: set min/max frequency for uncore to maximum value\n" 1504 " -i (frequency index): set min/max frequency for uncore to specified frequency index\n" 1505 " --config (port,queue,lcore): rx queues configuration\n" 1506 " --high-perf-cores CORELIST: list of high performance cores\n" 1507 " --perf-config: similar as config, cores specified as indices" 1508 " for bins containing high or regular performance cores\n" 1509 " --no-numa: optional, disable numa awareness\n" 1510 " --max-pkt-len PKTLEN: maximum packet length in decimal (64-9600)\n" 1511 " --parse-ptype: parse packet type by software\n" 1512 " --legacy: use legacy interrupt-based scaling\n" 1513 " --telemetry: enable telemetry mode, to update" 1514 " empty polls, full polls, and core busyness to telemetry\n" 1515 " --interrupt-only: enable interrupt-only mode\n" 1516 " --pmd-mgmt MODE: enable PMD power management mode. " 1517 "Currently supported modes: baseline, monitor, pause, scale\n" 1518 " --max-empty-polls MAX_EMPTY_POLLS: number of empty polls to" 1519 " wait before entering sleep state\n" 1520 " --pause-duration DURATION: set the duration, in microseconds," 1521 " of the pause callback\n" 1522 " --scale-freq-min FREQ_MIN: set minimum frequency for scaling mode for" 1523 " all application lcores (FREQ_MIN must be in kHz, in increments of 100MHz)\n" 1524 " --scale-freq-max FREQ_MAX: set maximum frequency for scaling mode for" 1525 " all application lcores (FREQ_MAX must be in kHz, in increments of 100MHz)\n", 1526 prgname); 1527 } 1528 1529 static int 1530 parse_int(const char *opt) 1531 { 1532 char *end = NULL; 1533 unsigned long val; 1534 1535 /* parse integer string */ 1536 val = strtoul(opt, &end, 10); 1537 if ((opt[0] == '\0') || (end == NULL) || (*end != '\0')) 1538 return -1; 1539 1540 return val; 1541 } 1542 1543 static int parse_max_pkt_len(const char *pktlen) 1544 { 1545 char *end = NULL; 1546 unsigned long len; 1547 1548 /* parse decimal string */ 1549 len = strtoul(pktlen, &end, 10); 1550 if ((pktlen[0] == '\0') || (end == NULL) || (*end != '\0')) 1551 return -1; 1552 1553 if (len == 0) 1554 return -1; 1555 1556 return len; 1557 } 1558 1559 static int 1560 parse_uncore_options(enum uncore_choice choice, const char *argument) 1561 { 1562 unsigned int die, pkg, max_pkg, max_die; 1563 int ret = 0; 1564 max_pkg = rte_power_uncore_get_num_pkgs(); 1565 if (max_pkg == 0) 1566 return -1; 1567 1568 for (pkg = 0; pkg < max_pkg; pkg++) { 1569 max_die = rte_power_uncore_get_num_dies(pkg); 1570 if (max_die == 0) 1571 return -1; 1572 for (die = 0; die < max_die; die++) { 1573 ret = rte_power_uncore_init(pkg, die); 1574 if (ret == -1) { 1575 RTE_LOG(INFO, L3FWD_POWER, "Unable to initialize uncore for pkg %02u die %02u\n" 1576 , pkg, die); 1577 return ret; 1578 } 1579 if (choice == UNCORE_MIN) { 1580 ret = rte_power_uncore_freq_min(pkg, die); 1581 if (ret == -1) { 1582 RTE_LOG(INFO, L3FWD_POWER, 1583 "Unable to set the uncore min/max to minimum uncore frequency value for pkg %02u die %02u\n" 1584 , pkg, die); 1585 return ret; 1586 } 1587 } else if (choice == UNCORE_MAX) { 1588 ret = rte_power_uncore_freq_max(pkg, die); 1589 if (ret == -1) { 1590 RTE_LOG(INFO, L3FWD_POWER, 1591 "Unable to set uncore min/max to maximum uncore frequency value for pkg %02u die %02u\n" 1592 , pkg, die); 1593 return ret; 1594 } 1595 } else if (choice == UNCORE_IDX) { 1596 char *ptr = NULL; 1597 int frequency_index = strtol(argument, &ptr, 10); 1598 if (argument == ptr) { 1599 RTE_LOG(INFO, L3FWD_POWER, "Index given is not a valid number."); 1600 return -1; 1601 } 1602 int freq_array_len = rte_power_uncore_get_num_freqs(pkg, die); 1603 if (frequency_index > freq_array_len - 1) { 1604 RTE_LOG(INFO, L3FWD_POWER, 1605 "Frequency index given out of range, please choose a value from 0 to %d.\n", 1606 freq_array_len); 1607 return -1; 1608 } 1609 ret = rte_power_set_uncore_freq(pkg, die, frequency_index); 1610 if (ret == -1) { 1611 RTE_LOG(INFO, L3FWD_POWER, 1612 "Unable to set min/max uncore index value for pkg %02u die %02u\n", 1613 pkg, die); 1614 return ret; 1615 } 1616 } else { 1617 RTE_LOG(INFO, L3FWD_POWER, "Uncore choice provided invalid\n"); 1618 return -1; 1619 } 1620 } 1621 } 1622 1623 RTE_LOG(INFO, L3FWD_POWER, "Successfully set max/min/index uncore frequency.\n"); 1624 return ret; 1625 } 1626 1627 static int 1628 parse_portmask(const char *portmask) 1629 { 1630 char *end = NULL; 1631 unsigned long pm; 1632 1633 /* parse hexadecimal string */ 1634 pm = strtoul(portmask, &end, 16); 1635 if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0')) 1636 return 0; 1637 1638 return pm; 1639 } 1640 1641 static int 1642 parse_config(const char *q_arg) 1643 { 1644 char s[256]; 1645 const char *p, *p0 = q_arg; 1646 char *end; 1647 enum fieldnames { 1648 FLD_PORT = 0, 1649 FLD_QUEUE, 1650 FLD_LCORE, 1651 _NUM_FLD 1652 }; 1653 unsigned long int_fld[_NUM_FLD]; 1654 char *str_fld[_NUM_FLD]; 1655 int i; 1656 unsigned size; 1657 1658 nb_lcore_params = 0; 1659 1660 while ((p = strchr(p0,'(')) != NULL) { 1661 ++p; 1662 if((p0 = strchr(p,')')) == NULL) 1663 return -1; 1664 1665 size = p0 - p; 1666 if(size >= sizeof(s)) 1667 return -1; 1668 1669 snprintf(s, sizeof(s), "%.*s", size, p); 1670 if (rte_strsplit(s, sizeof(s), str_fld, _NUM_FLD, ',') != 1671 _NUM_FLD) 1672 return -1; 1673 for (i = 0; i < _NUM_FLD; i++){ 1674 errno = 0; 1675 int_fld[i] = strtoul(str_fld[i], &end, 0); 1676 if (errno != 0 || end == str_fld[i] || int_fld[i] > 1677 255) 1678 return -1; 1679 } 1680 if (nb_lcore_params >= MAX_LCORE_PARAMS) { 1681 printf("exceeded max number of lcore params: %hu\n", 1682 nb_lcore_params); 1683 return -1; 1684 } 1685 lcore_params_array[nb_lcore_params].port_id = 1686 (uint8_t)int_fld[FLD_PORT]; 1687 lcore_params_array[nb_lcore_params].queue_id = 1688 (uint8_t)int_fld[FLD_QUEUE]; 1689 lcore_params_array[nb_lcore_params].lcore_id = 1690 (uint8_t)int_fld[FLD_LCORE]; 1691 ++nb_lcore_params; 1692 } 1693 lcore_params = lcore_params_array; 1694 1695 return 0; 1696 } 1697 1698 static int 1699 parse_pmd_mgmt_config(const char *name) 1700 { 1701 #define PMD_MGMT_MONITOR "monitor" 1702 #define PMD_MGMT_PAUSE "pause" 1703 #define PMD_MGMT_SCALE "scale" 1704 #define PMD_MGMT_BASELINE "baseline" 1705 1706 if (strncmp(PMD_MGMT_MONITOR, name, sizeof(PMD_MGMT_MONITOR)) == 0) { 1707 pmgmt_type = RTE_POWER_MGMT_TYPE_MONITOR; 1708 return 0; 1709 } 1710 1711 if (strncmp(PMD_MGMT_PAUSE, name, sizeof(PMD_MGMT_PAUSE)) == 0) { 1712 pmgmt_type = RTE_POWER_MGMT_TYPE_PAUSE; 1713 return 0; 1714 } 1715 1716 if (strncmp(PMD_MGMT_SCALE, name, sizeof(PMD_MGMT_SCALE)) == 0) { 1717 pmgmt_type = RTE_POWER_MGMT_TYPE_SCALE; 1718 return 0; 1719 } 1720 if (strncmp(PMD_MGMT_BASELINE, name, sizeof(PMD_MGMT_BASELINE)) == 0) { 1721 baseline_enabled = true; 1722 return 0; 1723 } 1724 /* unknown PMD power management mode */ 1725 return -1; 1726 } 1727 1728 #define CMD_LINE_OPT_PARSE_PTYPE "parse-ptype" 1729 #define CMD_LINE_OPT_LEGACY "legacy" 1730 #define CMD_LINE_OPT_INTERRUPT_ONLY "interrupt-only" 1731 #define CMD_LINE_OPT_TELEMETRY "telemetry" 1732 #define CMD_LINE_OPT_PMD_MGMT "pmd-mgmt" 1733 #define CMD_LINE_OPT_MAX_PKT_LEN "max-pkt-len" 1734 #define CMD_LINE_OPT_MAX_EMPTY_POLLS "max-empty-polls" 1735 #define CMD_LINE_OPT_PAUSE_DURATION "pause-duration" 1736 #define CMD_LINE_OPT_SCALE_FREQ_MIN "scale-freq-min" 1737 #define CMD_LINE_OPT_SCALE_FREQ_MAX "scale-freq-max" 1738 1739 /* Parse the argument given in the command line of the application */ 1740 static int 1741 parse_args(int argc, char **argv) 1742 { 1743 int opt, ret; 1744 char **argvopt; 1745 int option_index; 1746 char *prgname = argv[0]; 1747 static struct option lgopts[] = { 1748 {"config", 1, 0, 0}, 1749 {"perf-config", 1, 0, 0}, 1750 {"high-perf-cores", 1, 0, 0}, 1751 {"no-numa", 0, 0, 0}, 1752 {CMD_LINE_OPT_MAX_PKT_LEN, 1, 0, 0}, 1753 {CMD_LINE_OPT_PARSE_PTYPE, 0, 0, 0}, 1754 {CMD_LINE_OPT_LEGACY, 0, 0, 0}, 1755 {CMD_LINE_OPT_TELEMETRY, 0, 0, 0}, 1756 {CMD_LINE_OPT_INTERRUPT_ONLY, 0, 0, 0}, 1757 {CMD_LINE_OPT_PMD_MGMT, 1, 0, 0}, 1758 {CMD_LINE_OPT_MAX_EMPTY_POLLS, 1, 0, 0}, 1759 {CMD_LINE_OPT_PAUSE_DURATION, 1, 0, 0}, 1760 {CMD_LINE_OPT_SCALE_FREQ_MIN, 1, 0, 0}, 1761 {CMD_LINE_OPT_SCALE_FREQ_MAX, 1, 0, 0}, 1762 {NULL, 0, 0, 0} 1763 }; 1764 1765 argvopt = argv; 1766 1767 while ((opt = getopt_long(argc, argvopt, "p:PuUi:", 1768 lgopts, &option_index)) != EOF) { 1769 1770 switch (opt) { 1771 /* portmask */ 1772 case 'p': 1773 enabled_port_mask = parse_portmask(optarg); 1774 if (enabled_port_mask == 0) { 1775 printf("invalid portmask\n"); 1776 print_usage(prgname); 1777 return -1; 1778 } 1779 break; 1780 case 'P': 1781 printf("Promiscuous mode selected\n"); 1782 promiscuous_on = 1; 1783 break; 1784 case 'u': 1785 enabled_uncore = parse_uncore_options(UNCORE_MIN, NULL); 1786 if (enabled_uncore < 0) { 1787 print_usage(prgname); 1788 return -1; 1789 } 1790 break; 1791 case 'U': 1792 enabled_uncore = parse_uncore_options(UNCORE_MAX, NULL); 1793 if (enabled_uncore < 0) { 1794 print_usage(prgname); 1795 return -1; 1796 } 1797 break; 1798 case 'i': 1799 enabled_uncore = parse_uncore_options(UNCORE_IDX, optarg); 1800 if (enabled_uncore < 0) { 1801 print_usage(prgname); 1802 return -1; 1803 } 1804 break; 1805 /* long options */ 1806 case 0: 1807 if (!strncmp(lgopts[option_index].name, "config", 6)) { 1808 ret = parse_config(optarg); 1809 if (ret) { 1810 printf("invalid config\n"); 1811 print_usage(prgname); 1812 return -1; 1813 } 1814 } 1815 1816 if (!strncmp(lgopts[option_index].name, 1817 "perf-config", 11)) { 1818 ret = parse_perf_config(optarg); 1819 if (ret) { 1820 printf("invalid perf-config\n"); 1821 print_usage(prgname); 1822 return -1; 1823 } 1824 } 1825 1826 if (!strncmp(lgopts[option_index].name, 1827 "high-perf-cores", 15)) { 1828 ret = parse_perf_core_list(optarg); 1829 if (ret) { 1830 printf("invalid high-perf-cores\n"); 1831 print_usage(prgname); 1832 return -1; 1833 } 1834 } 1835 1836 if (!strncmp(lgopts[option_index].name, 1837 "no-numa", 7)) { 1838 printf("numa is disabled \n"); 1839 numa_on = 0; 1840 } 1841 1842 if (!strncmp(lgopts[option_index].name, 1843 CMD_LINE_OPT_LEGACY, 1844 sizeof(CMD_LINE_OPT_LEGACY))) { 1845 if (app_mode != APP_MODE_DEFAULT) { 1846 printf(" legacy mode is mutually exclusive with other modes\n"); 1847 return -1; 1848 } 1849 app_mode = APP_MODE_LEGACY; 1850 printf("legacy mode is enabled\n"); 1851 } 1852 1853 if (!strncmp(lgopts[option_index].name, 1854 CMD_LINE_OPT_TELEMETRY, 1855 sizeof(CMD_LINE_OPT_TELEMETRY))) { 1856 if (app_mode != APP_MODE_DEFAULT) { 1857 printf(" telemetry mode is mutually exclusive with other modes\n"); 1858 return -1; 1859 } 1860 app_mode = APP_MODE_TELEMETRY; 1861 printf("telemetry mode is enabled\n"); 1862 } 1863 1864 if (!strncmp(lgopts[option_index].name, 1865 CMD_LINE_OPT_PMD_MGMT, 1866 sizeof(CMD_LINE_OPT_PMD_MGMT))) { 1867 if (app_mode != APP_MODE_DEFAULT) { 1868 printf(" power mgmt mode is mutually exclusive with other modes\n"); 1869 return -1; 1870 } 1871 if (parse_pmd_mgmt_config(optarg) < 0) { 1872 printf(" Invalid PMD power management mode: %s\n", 1873 optarg); 1874 return -1; 1875 } 1876 app_mode = APP_MODE_PMD_MGMT; 1877 printf("PMD power mgmt mode is enabled\n"); 1878 } 1879 if (!strncmp(lgopts[option_index].name, 1880 CMD_LINE_OPT_INTERRUPT_ONLY, 1881 sizeof(CMD_LINE_OPT_INTERRUPT_ONLY))) { 1882 if (app_mode != APP_MODE_DEFAULT) { 1883 printf(" interrupt-only mode is mutually exclusive with other modes\n"); 1884 return -1; 1885 } 1886 app_mode = APP_MODE_INTERRUPT; 1887 printf("interrupt-only mode is enabled\n"); 1888 } 1889 1890 if (!strncmp(lgopts[option_index].name, 1891 CMD_LINE_OPT_MAX_PKT_LEN, 1892 sizeof(CMD_LINE_OPT_MAX_PKT_LEN))) { 1893 printf("Custom frame size is configured\n"); 1894 max_pkt_len = parse_max_pkt_len(optarg); 1895 } 1896 1897 if (!strncmp(lgopts[option_index].name, 1898 CMD_LINE_OPT_PARSE_PTYPE, 1899 sizeof(CMD_LINE_OPT_PARSE_PTYPE))) { 1900 printf("soft parse-ptype is enabled\n"); 1901 parse_ptype = 1; 1902 } 1903 1904 if (!strncmp(lgopts[option_index].name, 1905 CMD_LINE_OPT_MAX_EMPTY_POLLS, 1906 sizeof(CMD_LINE_OPT_MAX_EMPTY_POLLS))) { 1907 printf("Maximum empty polls configured\n"); 1908 max_empty_polls = parse_int(optarg); 1909 } 1910 1911 if (!strncmp(lgopts[option_index].name, 1912 CMD_LINE_OPT_PAUSE_DURATION, 1913 sizeof(CMD_LINE_OPT_PAUSE_DURATION))) { 1914 printf("Pause duration configured\n"); 1915 pause_duration = parse_int(optarg); 1916 } 1917 1918 if (!strncmp(lgopts[option_index].name, 1919 CMD_LINE_OPT_SCALE_FREQ_MIN, 1920 sizeof(CMD_LINE_OPT_SCALE_FREQ_MIN))) { 1921 printf("Scaling frequency minimum configured\n"); 1922 scale_freq_min = parse_int(optarg); 1923 } 1924 1925 if (!strncmp(lgopts[option_index].name, 1926 CMD_LINE_OPT_SCALE_FREQ_MAX, 1927 sizeof(CMD_LINE_OPT_SCALE_FREQ_MAX))) { 1928 printf("Scaling frequency maximum configured\n"); 1929 scale_freq_max = parse_int(optarg); 1930 } 1931 1932 break; 1933 1934 default: 1935 print_usage(prgname); 1936 return -1; 1937 } 1938 } 1939 1940 if (optind >= 0) 1941 argv[optind-1] = prgname; 1942 1943 ret = optind-1; 1944 optind = 1; /* reset getopt lib */ 1945 return ret; 1946 } 1947 1948 static void 1949 print_ethaddr(const char *name, const struct rte_ether_addr *eth_addr) 1950 { 1951 char buf[RTE_ETHER_ADDR_FMT_SIZE]; 1952 rte_ether_format_addr(buf, RTE_ETHER_ADDR_FMT_SIZE, eth_addr); 1953 printf("%s%s", name, buf); 1954 } 1955 1956 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 1957 static void 1958 setup_hash(int socketid) 1959 { 1960 struct rte_hash_parameters ipv4_l3fwd_hash_params = { 1961 .name = NULL, 1962 .entries = L3FWD_HASH_ENTRIES, 1963 .key_len = sizeof(struct ipv4_5tuple), 1964 .hash_func = DEFAULT_HASH_FUNC, 1965 .hash_func_init_val = 0, 1966 }; 1967 1968 struct rte_hash_parameters ipv6_l3fwd_hash_params = { 1969 .name = NULL, 1970 .entries = L3FWD_HASH_ENTRIES, 1971 .key_len = sizeof(struct ipv6_5tuple), 1972 .hash_func = DEFAULT_HASH_FUNC, 1973 .hash_func_init_val = 0, 1974 }; 1975 1976 unsigned i; 1977 int ret; 1978 char s[64]; 1979 1980 /* create ipv4 hash */ 1981 snprintf(s, sizeof(s), "ipv4_l3fwd_hash_%d", socketid); 1982 ipv4_l3fwd_hash_params.name = s; 1983 ipv4_l3fwd_hash_params.socket_id = socketid; 1984 ipv4_l3fwd_lookup_struct[socketid] = 1985 rte_hash_create(&ipv4_l3fwd_hash_params); 1986 if (ipv4_l3fwd_lookup_struct[socketid] == NULL) 1987 rte_exit(EXIT_FAILURE, "Unable to create the l3fwd hash on " 1988 "socket %d\n", socketid); 1989 1990 /* create ipv6 hash */ 1991 snprintf(s, sizeof(s), "ipv6_l3fwd_hash_%d", socketid); 1992 ipv6_l3fwd_hash_params.name = s; 1993 ipv6_l3fwd_hash_params.socket_id = socketid; 1994 ipv6_l3fwd_lookup_struct[socketid] = 1995 rte_hash_create(&ipv6_l3fwd_hash_params); 1996 if (ipv6_l3fwd_lookup_struct[socketid] == NULL) 1997 rte_exit(EXIT_FAILURE, "Unable to create the l3fwd hash on " 1998 "socket %d\n", socketid); 1999 2000 2001 /* populate the ipv4 hash */ 2002 for (i = 0; i < RTE_DIM(ipv4_l3fwd_route_array); i++) { 2003 ret = rte_hash_add_key (ipv4_l3fwd_lookup_struct[socketid], 2004 (void *) &ipv4_l3fwd_route_array[i].key); 2005 if (ret < 0) { 2006 rte_exit(EXIT_FAILURE, "Unable to add entry %u to the" 2007 "l3fwd hash on socket %d\n", i, socketid); 2008 } 2009 ipv4_l3fwd_out_if[ret] = ipv4_l3fwd_route_array[i].if_out; 2010 printf("Hash: Adding key\n"); 2011 print_ipv4_key(ipv4_l3fwd_route_array[i].key); 2012 } 2013 2014 /* populate the ipv6 hash */ 2015 for (i = 0; i < RTE_DIM(ipv6_l3fwd_route_array); i++) { 2016 ret = rte_hash_add_key (ipv6_l3fwd_lookup_struct[socketid], 2017 (void *) &ipv6_l3fwd_route_array[i].key); 2018 if (ret < 0) { 2019 rte_exit(EXIT_FAILURE, "Unable to add entry %u to the" 2020 "l3fwd hash on socket %d\n", i, socketid); 2021 } 2022 ipv6_l3fwd_out_if[ret] = ipv6_l3fwd_route_array[i].if_out; 2023 printf("Hash: Adding key\n"); 2024 print_ipv6_key(ipv6_l3fwd_route_array[i].key); 2025 } 2026 } 2027 #endif 2028 2029 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 2030 static void 2031 setup_lpm(int socketid) 2032 { 2033 unsigned i; 2034 int ret; 2035 char s[64]; 2036 2037 /* create the LPM table */ 2038 struct rte_lpm_config lpm_ipv4_config; 2039 2040 lpm_ipv4_config.max_rules = IPV4_L3FWD_LPM_MAX_RULES; 2041 lpm_ipv4_config.number_tbl8s = 256; 2042 lpm_ipv4_config.flags = 0; 2043 2044 snprintf(s, sizeof(s), "IPV4_L3FWD_LPM_%d", socketid); 2045 ipv4_l3fwd_lookup_struct[socketid] = 2046 rte_lpm_create(s, socketid, &lpm_ipv4_config); 2047 if (ipv4_l3fwd_lookup_struct[socketid] == NULL) 2048 rte_exit(EXIT_FAILURE, "Unable to create the l3fwd LPM table" 2049 " on socket %d\n", socketid); 2050 2051 /* populate the LPM table */ 2052 for (i = 0; i < RTE_DIM(ipv4_l3fwd_route_array); i++) { 2053 ret = rte_lpm_add(ipv4_l3fwd_lookup_struct[socketid], 2054 ipv4_l3fwd_route_array[i].ip, 2055 ipv4_l3fwd_route_array[i].depth, 2056 ipv4_l3fwd_route_array[i].if_out); 2057 2058 if (ret < 0) { 2059 rte_exit(EXIT_FAILURE, "Unable to add entry %u to the " 2060 "l3fwd LPM table on socket %d\n", 2061 i, socketid); 2062 } 2063 2064 printf("LPM: Adding route 0x%08x / %d (%d)\n", 2065 (unsigned)ipv4_l3fwd_route_array[i].ip, 2066 ipv4_l3fwd_route_array[i].depth, 2067 ipv4_l3fwd_route_array[i].if_out); 2068 } 2069 } 2070 #endif 2071 2072 static int 2073 init_mem(unsigned nb_mbuf) 2074 { 2075 struct lcore_conf *qconf; 2076 int socketid; 2077 unsigned lcore_id; 2078 char s[64]; 2079 2080 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { 2081 if (rte_lcore_is_enabled(lcore_id) == 0) 2082 continue; 2083 2084 if (numa_on) 2085 socketid = rte_lcore_to_socket_id(lcore_id); 2086 else 2087 socketid = 0; 2088 2089 if (socketid >= NB_SOCKETS) { 2090 rte_exit(EXIT_FAILURE, "Socket %d of lcore %u is " 2091 "out of range %d\n", socketid, 2092 lcore_id, NB_SOCKETS); 2093 } 2094 if (pktmbuf_pool[socketid] == NULL) { 2095 snprintf(s, sizeof(s), "mbuf_pool_%d", socketid); 2096 pktmbuf_pool[socketid] = 2097 rte_pktmbuf_pool_create(s, nb_mbuf, 2098 MEMPOOL_CACHE_SIZE, 0, 2099 RTE_MBUF_DEFAULT_BUF_SIZE, 2100 socketid); 2101 if (pktmbuf_pool[socketid] == NULL) 2102 rte_exit(EXIT_FAILURE, 2103 "Cannot init mbuf pool on socket %d\n", 2104 socketid); 2105 else 2106 printf("Allocated mbuf pool on socket %d\n", 2107 socketid); 2108 2109 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 2110 setup_lpm(socketid); 2111 #else 2112 setup_hash(socketid); 2113 #endif 2114 } 2115 qconf = &lcore_conf[lcore_id]; 2116 qconf->ipv4_lookup_struct = ipv4_l3fwd_lookup_struct[socketid]; 2117 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 2118 qconf->ipv6_lookup_struct = ipv6_l3fwd_lookup_struct[socketid]; 2119 #endif 2120 } 2121 return 0; 2122 } 2123 2124 /* Check the link status of all ports in up to 9s, and print them finally */ 2125 static void 2126 check_all_ports_link_status(uint32_t port_mask) 2127 { 2128 #define CHECK_INTERVAL 100 /* 100ms */ 2129 #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */ 2130 uint8_t count, all_ports_up, print_flag = 0; 2131 uint16_t portid; 2132 struct rte_eth_link link; 2133 int ret; 2134 char link_status_text[RTE_ETH_LINK_MAX_STR_LEN]; 2135 2136 printf("\nChecking link status"); 2137 fflush(stdout); 2138 for (count = 0; count <= MAX_CHECK_TIME; count++) { 2139 all_ports_up = 1; 2140 RTE_ETH_FOREACH_DEV(portid) { 2141 if ((port_mask & (1 << portid)) == 0) 2142 continue; 2143 memset(&link, 0, sizeof(link)); 2144 ret = rte_eth_link_get_nowait(portid, &link); 2145 if (ret < 0) { 2146 all_ports_up = 0; 2147 if (print_flag == 1) 2148 printf("Port %u link get failed: %s\n", 2149 portid, rte_strerror(-ret)); 2150 continue; 2151 } 2152 /* print link status if flag set */ 2153 if (print_flag == 1) { 2154 rte_eth_link_to_str(link_status_text, 2155 sizeof(link_status_text), &link); 2156 printf("Port %d %s\n", portid, 2157 link_status_text); 2158 continue; 2159 } 2160 /* clear all_ports_up flag if any link down */ 2161 if (link.link_status == RTE_ETH_LINK_DOWN) { 2162 all_ports_up = 0; 2163 break; 2164 } 2165 } 2166 /* after finally printing all link status, get out */ 2167 if (print_flag == 1) 2168 break; 2169 2170 if (all_ports_up == 0) { 2171 printf("."); 2172 fflush(stdout); 2173 rte_delay_ms(CHECK_INTERVAL); 2174 } 2175 2176 /* set the print_flag if all ports up or timeout */ 2177 if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) { 2178 print_flag = 1; 2179 printf("done\n"); 2180 } 2181 } 2182 } 2183 2184 static int check_ptype(uint16_t portid) 2185 { 2186 int i, ret; 2187 int ptype_l3_ipv4 = 0; 2188 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 2189 int ptype_l3_ipv6 = 0; 2190 #endif 2191 uint32_t ptype_mask = RTE_PTYPE_L3_MASK; 2192 2193 ret = rte_eth_dev_get_supported_ptypes(portid, ptype_mask, NULL, 0); 2194 if (ret <= 0) 2195 return 0; 2196 2197 uint32_t ptypes[ret]; 2198 2199 ret = rte_eth_dev_get_supported_ptypes(portid, ptype_mask, ptypes, ret); 2200 for (i = 0; i < ret; ++i) { 2201 if (ptypes[i] & RTE_PTYPE_L3_IPV4) 2202 ptype_l3_ipv4 = 1; 2203 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 2204 if (ptypes[i] & RTE_PTYPE_L3_IPV6) 2205 ptype_l3_ipv6 = 1; 2206 #endif 2207 } 2208 2209 if (ptype_l3_ipv4 == 0) 2210 printf("port %d cannot parse RTE_PTYPE_L3_IPV4\n", portid); 2211 2212 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 2213 if (ptype_l3_ipv6 == 0) 2214 printf("port %d cannot parse RTE_PTYPE_L3_IPV6\n", portid); 2215 #endif 2216 2217 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 2218 if (ptype_l3_ipv4) 2219 #else /* APP_LOOKUP_EXACT_MATCH */ 2220 if (ptype_l3_ipv4 && ptype_l3_ipv6) 2221 #endif 2222 return 1; 2223 2224 return 0; 2225 2226 } 2227 2228 static int 2229 init_power_library(void) 2230 { 2231 enum power_management_env env; 2232 unsigned int lcore_id; 2233 int ret = 0; 2234 2235 RTE_LCORE_FOREACH(lcore_id) { 2236 /* init power management library */ 2237 ret = rte_power_init(lcore_id); 2238 if (ret) { 2239 RTE_LOG(ERR, POWER, 2240 "Library initialization failed on core %u\n", 2241 lcore_id); 2242 return ret; 2243 } 2244 /* we're not supporting the VM channel mode */ 2245 env = rte_power_get_env(); 2246 if (env != PM_ENV_ACPI_CPUFREQ && 2247 env != PM_ENV_PSTATE_CPUFREQ) { 2248 RTE_LOG(ERR, POWER, 2249 "Only ACPI and PSTATE mode are supported\n"); 2250 return -1; 2251 } 2252 } 2253 return ret; 2254 } 2255 2256 static int 2257 deinit_power_library(void) 2258 { 2259 unsigned int lcore_id, max_pkg, max_die, die, pkg; 2260 int ret = 0; 2261 2262 RTE_LCORE_FOREACH(lcore_id) { 2263 /* deinit power management library */ 2264 ret = rte_power_exit(lcore_id); 2265 if (ret) { 2266 RTE_LOG(ERR, POWER, 2267 "Library deinitialization failed on core %u\n", 2268 lcore_id); 2269 return ret; 2270 } 2271 } 2272 2273 /* if uncore option was set */ 2274 if (enabled_uncore == 0) { 2275 max_pkg = rte_power_uncore_get_num_pkgs(); 2276 if (max_pkg == 0) 2277 return -1; 2278 for (pkg = 0; pkg < max_pkg; pkg++) { 2279 max_die = rte_power_uncore_get_num_dies(pkg); 2280 if (max_die == 0) 2281 return -1; 2282 for (die = 0; die < max_die; die++) { 2283 ret = rte_power_uncore_exit(pkg, die); 2284 if (ret < 0) { 2285 RTE_LOG(ERR, L3FWD_POWER, "Failed to exit uncore deinit successfully for pkg %02u die %02u\n" 2286 , pkg, die); 2287 return -1; 2288 } 2289 } 2290 } 2291 } 2292 return ret; 2293 } 2294 2295 static void 2296 get_current_stat_values(uint64_t *values) 2297 { 2298 unsigned int lcore_id = rte_lcore_id(); 2299 struct lcore_conf *qconf; 2300 uint64_t app_eps = 0, app_fps = 0, app_br = 0; 2301 uint64_t count = 0; 2302 2303 RTE_LCORE_FOREACH_WORKER(lcore_id) { 2304 qconf = &lcore_conf[lcore_id]; 2305 if (qconf->n_rx_queue == 0) 2306 continue; 2307 count++; 2308 rte_spinlock_lock(&stats[lcore_id].telemetry_lock); 2309 app_eps += stats[lcore_id].ep_nep[1]; 2310 app_fps += stats[lcore_id].fp_nfp[1]; 2311 app_br += stats[lcore_id].br; 2312 rte_spinlock_unlock(&stats[lcore_id].telemetry_lock); 2313 } 2314 2315 if (count > 0) { 2316 values[0] = app_eps/count; 2317 values[1] = app_fps/count; 2318 values[2] = app_br/count; 2319 } else 2320 memset(values, 0, sizeof(uint64_t) * NUM_TELSTATS); 2321 2322 } 2323 2324 static void 2325 update_telemetry(__rte_unused struct rte_timer *tim, 2326 __rte_unused void *arg) 2327 { 2328 int ret; 2329 uint64_t values[NUM_TELSTATS] = {0}; 2330 2331 get_current_stat_values(values); 2332 ret = rte_metrics_update_values(RTE_METRICS_GLOBAL, telstats_index, 2333 values, RTE_DIM(values)); 2334 if (ret < 0) 2335 RTE_LOG(WARNING, POWER, "failed to update metrics\n"); 2336 } 2337 2338 static int 2339 handle_app_stats(const char *cmd __rte_unused, 2340 const char *params __rte_unused, 2341 struct rte_tel_data *d) 2342 { 2343 uint64_t values[NUM_TELSTATS] = {0}; 2344 uint32_t i; 2345 2346 rte_tel_data_start_dict(d); 2347 get_current_stat_values(values); 2348 for (i = 0; i < NUM_TELSTATS; i++) 2349 rte_tel_data_add_dict_uint(d, telstats_strings[i].name, 2350 values[i]); 2351 return 0; 2352 } 2353 2354 static void 2355 telemetry_setup_timer(void) 2356 { 2357 int lcore_id = rte_lcore_id(); 2358 uint64_t hz = rte_get_timer_hz(); 2359 uint64_t ticks; 2360 2361 ticks = hz / TELEMETRY_INTERVALS_PER_SEC; 2362 rte_timer_reset_sync(&telemetry_timer, 2363 ticks, 2364 PERIODICAL, 2365 lcore_id, 2366 update_telemetry, 2367 NULL); 2368 } 2369 2370 static int 2371 launch_timer(unsigned int lcore_id) 2372 { 2373 int64_t prev_tsc = 0, cur_tsc, diff_tsc, cycles_10ms; 2374 2375 RTE_SET_USED(lcore_id); 2376 2377 2378 if (rte_get_main_lcore() != lcore_id) { 2379 rte_panic("timer on lcore:%d which is not main core:%d\n", 2380 lcore_id, 2381 rte_get_main_lcore()); 2382 } 2383 2384 RTE_LOG(INFO, POWER, "Bring up the Timer\n"); 2385 2386 telemetry_setup_timer(); 2387 2388 cycles_10ms = rte_get_timer_hz() / 100; 2389 2390 while (!is_done()) { 2391 cur_tsc = rte_rdtsc(); 2392 diff_tsc = cur_tsc - prev_tsc; 2393 if (diff_tsc > cycles_10ms) { 2394 rte_timer_manage(); 2395 prev_tsc = cur_tsc; 2396 cycles_10ms = rte_get_timer_hz() / 100; 2397 } 2398 } 2399 2400 RTE_LOG(INFO, POWER, "Timer_subsystem is done\n"); 2401 2402 return 0; 2403 } 2404 2405 static int 2406 autodetect_mode(void) 2407 { 2408 RTE_LOG(NOTICE, L3FWD_POWER, "Operating mode not specified, probing frequency scaling support...\n"); 2409 2410 /* 2411 * Empty poll and telemetry modes have to be specifically requested to 2412 * be enabled, but we can auto-detect between interrupt mode with or 2413 * without frequency scaling. Both ACPI and pstate can be used. 2414 */ 2415 if (rte_power_check_env_supported(PM_ENV_ACPI_CPUFREQ)) 2416 return APP_MODE_LEGACY; 2417 if (rte_power_check_env_supported(PM_ENV_PSTATE_CPUFREQ)) 2418 return APP_MODE_LEGACY; 2419 2420 RTE_LOG(NOTICE, L3FWD_POWER, "Frequency scaling not supported, selecting interrupt-only mode\n"); 2421 2422 return APP_MODE_INTERRUPT; 2423 } 2424 2425 static const char * 2426 mode_to_str(enum appmode mode) 2427 { 2428 switch (mode) { 2429 case APP_MODE_LEGACY: 2430 return "legacy"; 2431 case APP_MODE_TELEMETRY: 2432 return "telemetry"; 2433 case APP_MODE_INTERRUPT: 2434 return "interrupt-only"; 2435 case APP_MODE_PMD_MGMT: 2436 return "pmd mgmt"; 2437 default: 2438 return "invalid"; 2439 } 2440 } 2441 2442 static uint32_t 2443 eth_dev_get_overhead_len(uint32_t max_rx_pktlen, uint16_t max_mtu) 2444 { 2445 uint32_t overhead_len; 2446 2447 if (max_mtu != UINT16_MAX && max_rx_pktlen > max_mtu) 2448 overhead_len = max_rx_pktlen - max_mtu; 2449 else 2450 overhead_len = RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN; 2451 2452 return overhead_len; 2453 } 2454 2455 static int 2456 config_port_max_pkt_len(struct rte_eth_conf *conf, 2457 struct rte_eth_dev_info *dev_info) 2458 { 2459 uint32_t overhead_len; 2460 2461 if (max_pkt_len == 0) 2462 return 0; 2463 2464 if (max_pkt_len < RTE_ETHER_MIN_LEN || max_pkt_len > MAX_JUMBO_PKT_LEN) 2465 return -1; 2466 2467 overhead_len = eth_dev_get_overhead_len(dev_info->max_rx_pktlen, 2468 dev_info->max_mtu); 2469 conf->rxmode.mtu = max_pkt_len - overhead_len; 2470 2471 if (conf->rxmode.mtu > RTE_ETHER_MTU) 2472 conf->txmode.offloads |= RTE_ETH_TX_OFFLOAD_MULTI_SEGS; 2473 2474 return 0; 2475 } 2476 2477 /* Power library initialized in the main routine. 8< */ 2478 int 2479 main(int argc, char **argv) 2480 { 2481 struct lcore_conf *qconf; 2482 struct rte_eth_dev_info dev_info; 2483 struct rte_eth_txconf *txconf; 2484 int ret; 2485 uint16_t nb_ports; 2486 uint16_t queueid; 2487 unsigned lcore_id; 2488 uint64_t hz; 2489 uint32_t n_tx_queue, nb_lcores; 2490 uint32_t dev_rxq_num, dev_txq_num; 2491 uint8_t nb_rx_queue, queue, socketid; 2492 uint16_t portid; 2493 const char *ptr_strings[NUM_TELSTATS]; 2494 2495 /* init EAL */ 2496 ret = rte_eal_init(argc, argv); 2497 if (ret < 0) 2498 rte_exit(EXIT_FAILURE, "Invalid EAL parameters\n"); 2499 argc -= ret; 2500 argv += ret; 2501 2502 /* catch SIGINT and restore cpufreq governor to ondemand */ 2503 signal(SIGINT, signal_exit_now); 2504 2505 /* init RTE timer library to be used late */ 2506 rte_timer_subsystem_init(); 2507 2508 /* if we're running pmd-mgmt mode, don't default to baseline mode */ 2509 baseline_enabled = false; 2510 2511 /* parse application arguments (after the EAL ones) */ 2512 ret = parse_args(argc, argv); 2513 if (ret < 0) 2514 rte_exit(EXIT_FAILURE, "Invalid L3FWD parameters\n"); 2515 2516 if (app_mode == APP_MODE_DEFAULT) 2517 app_mode = autodetect_mode(); 2518 2519 RTE_LOG(INFO, L3FWD_POWER, "Selected operation mode: %s\n", 2520 mode_to_str(app_mode)); 2521 2522 /* only legacy mode relies on power library */ 2523 if ((app_mode == APP_MODE_LEGACY) && init_power_library()) 2524 rte_exit(EXIT_FAILURE, "init_power_library failed\n"); 2525 2526 if (update_lcore_params() < 0) 2527 rte_exit(EXIT_FAILURE, "update_lcore_params failed\n"); 2528 2529 if (check_lcore_params() < 0) 2530 rte_exit(EXIT_FAILURE, "check_lcore_params failed\n"); 2531 2532 ret = init_lcore_rx_queues(); 2533 if (ret < 0) 2534 rte_exit(EXIT_FAILURE, "init_lcore_rx_queues failed\n"); 2535 2536 nb_ports = rte_eth_dev_count_avail(); 2537 2538 if (check_port_config() < 0) 2539 rte_exit(EXIT_FAILURE, "check_port_config failed\n"); 2540 2541 nb_lcores = rte_lcore_count(); 2542 2543 /* initialize all ports */ 2544 RTE_ETH_FOREACH_DEV(portid) { 2545 struct rte_eth_conf local_port_conf = port_conf; 2546 /* not all app modes need interrupts */ 2547 bool need_intr = app_mode == APP_MODE_LEGACY || 2548 app_mode == APP_MODE_INTERRUPT; 2549 2550 /* skip ports that are not enabled */ 2551 if ((enabled_port_mask & (1 << portid)) == 0) { 2552 printf("\nSkipping disabled port %d\n", portid); 2553 continue; 2554 } 2555 2556 /* init port */ 2557 printf("Initializing port %d ... ", portid ); 2558 fflush(stdout); 2559 2560 ret = rte_eth_dev_info_get(portid, &dev_info); 2561 if (ret != 0) 2562 rte_exit(EXIT_FAILURE, 2563 "Error during getting device (port %u) info: %s\n", 2564 portid, strerror(-ret)); 2565 2566 dev_rxq_num = dev_info.max_rx_queues; 2567 dev_txq_num = dev_info.max_tx_queues; 2568 2569 nb_rx_queue = get_port_n_rx_queues(portid); 2570 if (nb_rx_queue > dev_rxq_num) 2571 rte_exit(EXIT_FAILURE, 2572 "Cannot configure not existed rxq: " 2573 "port=%d\n", portid); 2574 2575 n_tx_queue = nb_lcores; 2576 if (n_tx_queue > dev_txq_num) 2577 n_tx_queue = dev_txq_num; 2578 printf("Creating queues: nb_rxq=%d nb_txq=%u... ", 2579 nb_rx_queue, (unsigned)n_tx_queue ); 2580 /* If number of Rx queue is 0, no need to enable Rx interrupt */ 2581 if (nb_rx_queue == 0) 2582 need_intr = false; 2583 2584 if (need_intr) 2585 local_port_conf.intr_conf.rxq = 1; 2586 2587 ret = rte_eth_dev_info_get(portid, &dev_info); 2588 if (ret != 0) 2589 rte_exit(EXIT_FAILURE, 2590 "Error during getting device (port %u) info: %s\n", 2591 portid, strerror(-ret)); 2592 2593 ret = config_port_max_pkt_len(&local_port_conf, &dev_info); 2594 if (ret != 0) 2595 rte_exit(EXIT_FAILURE, 2596 "Invalid max packet length: %u (port %u)\n", 2597 max_pkt_len, portid); 2598 2599 if (dev_info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) 2600 local_port_conf.txmode.offloads |= 2601 RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE; 2602 2603 local_port_conf.rx_adv_conf.rss_conf.rss_hf &= 2604 dev_info.flow_type_rss_offloads; 2605 if (local_port_conf.rx_adv_conf.rss_conf.rss_hf != 2606 port_conf.rx_adv_conf.rss_conf.rss_hf) { 2607 printf("Port %u modified RSS hash function based on hardware support," 2608 "requested:%#"PRIx64" configured:%#"PRIx64"\n", 2609 portid, 2610 port_conf.rx_adv_conf.rss_conf.rss_hf, 2611 local_port_conf.rx_adv_conf.rss_conf.rss_hf); 2612 } 2613 2614 if (local_port_conf.rx_adv_conf.rss_conf.rss_hf == 0) 2615 local_port_conf.rxmode.mq_mode = RTE_ETH_MQ_RX_NONE; 2616 local_port_conf.rxmode.offloads &= dev_info.rx_offload_capa; 2617 port_conf.rxmode.offloads = local_port_conf.rxmode.offloads; 2618 2619 ret = rte_eth_dev_configure(portid, nb_rx_queue, 2620 (uint16_t)n_tx_queue, &local_port_conf); 2621 if (ret < 0) 2622 rte_exit(EXIT_FAILURE, "Cannot configure device: " 2623 "err=%d, port=%d\n", ret, portid); 2624 2625 ret = rte_eth_dev_adjust_nb_rx_tx_desc(portid, &nb_rxd, 2626 &nb_txd); 2627 if (ret < 0) 2628 rte_exit(EXIT_FAILURE, 2629 "Cannot adjust number of descriptors: err=%d, port=%d\n", 2630 ret, portid); 2631 2632 ret = rte_eth_macaddr_get(portid, &ports_eth_addr[portid]); 2633 if (ret < 0) 2634 rte_exit(EXIT_FAILURE, 2635 "Cannot get MAC address: err=%d, port=%d\n", 2636 ret, portid); 2637 2638 print_ethaddr(" Address:", &ports_eth_addr[portid]); 2639 printf(", "); 2640 2641 /* init memory */ 2642 ret = init_mem(NB_MBUF); 2643 if (ret < 0) 2644 rte_exit(EXIT_FAILURE, "init_mem failed\n"); 2645 2646 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { 2647 if (rte_lcore_is_enabled(lcore_id) == 0) 2648 continue; 2649 2650 /* Initialize TX buffers */ 2651 qconf = &lcore_conf[lcore_id]; 2652 qconf->tx_buffer[portid] = rte_zmalloc_socket("tx_buffer", 2653 RTE_ETH_TX_BUFFER_SIZE(MAX_PKT_BURST), 0, 2654 rte_eth_dev_socket_id(portid)); 2655 if (qconf->tx_buffer[portid] == NULL) 2656 rte_exit(EXIT_FAILURE, "Can't allocate tx buffer for port %u\n", 2657 portid); 2658 2659 rte_eth_tx_buffer_init(qconf->tx_buffer[portid], MAX_PKT_BURST); 2660 } 2661 2662 /* init one TX queue per couple (lcore,port) */ 2663 queueid = 0; 2664 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { 2665 if (rte_lcore_is_enabled(lcore_id) == 0) 2666 continue; 2667 2668 if (queueid >= dev_txq_num) 2669 continue; 2670 2671 if (numa_on) 2672 socketid = \ 2673 (uint8_t)rte_lcore_to_socket_id(lcore_id); 2674 else 2675 socketid = 0; 2676 2677 printf("txq=%u,%d,%d ", lcore_id, queueid, socketid); 2678 fflush(stdout); 2679 2680 txconf = &dev_info.default_txconf; 2681 txconf->offloads = local_port_conf.txmode.offloads; 2682 ret = rte_eth_tx_queue_setup(portid, queueid, nb_txd, 2683 socketid, txconf); 2684 if (ret < 0) 2685 rte_exit(EXIT_FAILURE, 2686 "rte_eth_tx_queue_setup: err=%d, " 2687 "port=%d\n", ret, portid); 2688 2689 qconf = &lcore_conf[lcore_id]; 2690 qconf->tx_queue_id[portid] = queueid; 2691 queueid++; 2692 2693 qconf->tx_port_id[qconf->n_tx_port] = portid; 2694 qconf->n_tx_port++; 2695 } 2696 printf("\n"); 2697 } 2698 2699 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { 2700 if (rte_lcore_is_enabled(lcore_id) == 0) 2701 continue; 2702 2703 if (app_mode == APP_MODE_LEGACY) { 2704 /* init timer structures for each enabled lcore */ 2705 rte_timer_init(&power_timers[lcore_id]); 2706 hz = rte_get_timer_hz(); 2707 rte_timer_reset(&power_timers[lcore_id], 2708 hz/TIMER_NUMBER_PER_SECOND, 2709 SINGLE, lcore_id, 2710 power_timer_cb, NULL); 2711 } 2712 qconf = &lcore_conf[lcore_id]; 2713 printf("\nInitializing rx queues on lcore %u ... ", lcore_id ); 2714 fflush(stdout); 2715 2716 /* init RX queues */ 2717 for(queue = 0; queue < qconf->n_rx_queue; ++queue) { 2718 struct rte_eth_rxconf rxq_conf; 2719 2720 portid = qconf->rx_queue_list[queue].port_id; 2721 queueid = qconf->rx_queue_list[queue].queue_id; 2722 2723 if (numa_on) 2724 socketid = \ 2725 (uint8_t)rte_lcore_to_socket_id(lcore_id); 2726 else 2727 socketid = 0; 2728 2729 printf("rxq=%d,%d,%d ", portid, queueid, socketid); 2730 fflush(stdout); 2731 2732 ret = rte_eth_dev_info_get(portid, &dev_info); 2733 if (ret != 0) 2734 rte_exit(EXIT_FAILURE, 2735 "Error during getting device (port %u) info: %s\n", 2736 portid, strerror(-ret)); 2737 2738 rxq_conf = dev_info.default_rxconf; 2739 rxq_conf.offloads = port_conf.rxmode.offloads; 2740 ret = rte_eth_rx_queue_setup(portid, queueid, nb_rxd, 2741 socketid, &rxq_conf, 2742 pktmbuf_pool[socketid]); 2743 if (ret < 0) 2744 rte_exit(EXIT_FAILURE, 2745 "rte_eth_rx_queue_setup: err=%d, " 2746 "port=%d\n", ret, portid); 2747 2748 if (parse_ptype) { 2749 if (add_cb_parse_ptype(portid, queueid) < 0) 2750 rte_exit(EXIT_FAILURE, 2751 "Fail to add ptype cb\n"); 2752 } 2753 2754 if (app_mode == APP_MODE_PMD_MGMT && !baseline_enabled) { 2755 /* Set power_pmd_mgmt configs passed by user */ 2756 rte_power_pmd_mgmt_set_emptypoll_max(max_empty_polls); 2757 ret = rte_power_pmd_mgmt_set_pause_duration(pause_duration); 2758 if (ret < 0) 2759 rte_exit(EXIT_FAILURE, 2760 "Error setting pause_duration: err=%d, lcore=%d\n", 2761 ret, lcore_id); 2762 2763 ret = rte_power_pmd_mgmt_set_scaling_freq_min(lcore_id, 2764 scale_freq_min); 2765 if (ret < 0) 2766 rte_exit(EXIT_FAILURE, 2767 "Error setting scaling freq min: err=%d, lcore=%d\n", 2768 ret, lcore_id); 2769 2770 ret = rte_power_pmd_mgmt_set_scaling_freq_max(lcore_id, 2771 scale_freq_max); 2772 if (ret < 0) 2773 rte_exit(EXIT_FAILURE, 2774 "Error setting scaling freq max: err=%d, lcore %d\n", 2775 ret, lcore_id); 2776 2777 ret = rte_power_ethdev_pmgmt_queue_enable( 2778 lcore_id, portid, queueid, 2779 pmgmt_type); 2780 if (ret < 0) 2781 rte_exit(EXIT_FAILURE, 2782 "rte_power_ethdev_pmgmt_queue_enable: err=%d, port=%d\n", 2783 ret, portid); 2784 } 2785 } 2786 } 2787 /* >8 End of power library initialization. */ 2788 2789 printf("\n"); 2790 2791 /* start ports */ 2792 RTE_ETH_FOREACH_DEV(portid) { 2793 if ((enabled_port_mask & (1 << portid)) == 0) { 2794 continue; 2795 } 2796 /* Start device */ 2797 ret = rte_eth_dev_start(portid); 2798 if (ret < 0) 2799 rte_exit(EXIT_FAILURE, "rte_eth_dev_start: err=%d, " 2800 "port=%d\n", ret, portid); 2801 /* 2802 * If enabled, put device in promiscuous mode. 2803 * This allows IO forwarding mode to forward packets 2804 * to itself through 2 cross-connected ports of the 2805 * target machine. 2806 */ 2807 if (promiscuous_on) { 2808 ret = rte_eth_promiscuous_enable(portid); 2809 if (ret != 0) 2810 rte_exit(EXIT_FAILURE, 2811 "rte_eth_promiscuous_enable: err=%s, port=%u\n", 2812 rte_strerror(-ret), portid); 2813 } 2814 /* initialize spinlock for each port */ 2815 rte_spinlock_init(&(locks[portid])); 2816 2817 if (!parse_ptype) 2818 if (!check_ptype(portid)) 2819 rte_exit(EXIT_FAILURE, 2820 "PMD can not provide needed ptypes\n"); 2821 } 2822 2823 check_all_ports_link_status(enabled_port_mask); 2824 2825 /* launch per-lcore init on every lcore */ 2826 if (app_mode == APP_MODE_LEGACY) { 2827 rte_eal_mp_remote_launch(main_legacy_loop, NULL, CALL_MAIN); 2828 } else if (app_mode == APP_MODE_TELEMETRY) { 2829 unsigned int i; 2830 2831 /* Init metrics library */ 2832 rte_metrics_init(rte_socket_id()); 2833 /** Register stats with metrics library */ 2834 for (i = 0; i < NUM_TELSTATS; i++) 2835 ptr_strings[i] = telstats_strings[i].name; 2836 2837 ret = rte_metrics_reg_names(ptr_strings, NUM_TELSTATS); 2838 if (ret >= 0) 2839 telstats_index = ret; 2840 else 2841 rte_exit(EXIT_FAILURE, "failed to register metrics names"); 2842 2843 RTE_LCORE_FOREACH_WORKER(lcore_id) { 2844 rte_spinlock_init(&stats[lcore_id].telemetry_lock); 2845 } 2846 rte_timer_init(&telemetry_timer); 2847 rte_telemetry_register_cmd("/l3fwd-power/stats", 2848 handle_app_stats, 2849 "Returns global power stats. Parameters: None"); 2850 rte_eal_mp_remote_launch(main_telemetry_loop, NULL, 2851 SKIP_MAIN); 2852 } else if (app_mode == APP_MODE_INTERRUPT) { 2853 rte_eal_mp_remote_launch(main_intr_loop, NULL, CALL_MAIN); 2854 } else if (app_mode == APP_MODE_PMD_MGMT) { 2855 /* reuse telemetry loop for PMD power management mode */ 2856 rte_eal_mp_remote_launch(main_telemetry_loop, NULL, CALL_MAIN); 2857 } 2858 2859 if (app_mode == APP_MODE_TELEMETRY) 2860 launch_timer(rte_lcore_id()); 2861 2862 RTE_LCORE_FOREACH_WORKER(lcore_id) { 2863 if (rte_eal_wait_lcore(lcore_id) < 0) 2864 return -1; 2865 } 2866 2867 if (app_mode == APP_MODE_PMD_MGMT) { 2868 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { 2869 if (rte_lcore_is_enabled(lcore_id) == 0) 2870 continue; 2871 qconf = &lcore_conf[lcore_id]; 2872 for (queue = 0; queue < qconf->n_rx_queue; ++queue) { 2873 portid = qconf->rx_queue_list[queue].port_id; 2874 queueid = qconf->rx_queue_list[queue].queue_id; 2875 2876 rte_power_ethdev_pmgmt_queue_disable(lcore_id, 2877 portid, queueid); 2878 } 2879 } 2880 } 2881 2882 RTE_ETH_FOREACH_DEV(portid) 2883 { 2884 if ((enabled_port_mask & (1 << portid)) == 0) 2885 continue; 2886 2887 ret = rte_eth_dev_stop(portid); 2888 if (ret != 0) 2889 RTE_LOG(ERR, L3FWD_POWER, "rte_eth_dev_stop: err=%d, port=%u\n", 2890 ret, portid); 2891 2892 rte_eth_dev_close(portid); 2893 } 2894 2895 if ((app_mode == APP_MODE_LEGACY) && deinit_power_library()) 2896 rte_exit(EXIT_FAILURE, "deinit_power_library failed\n"); 2897 2898 if (rte_eal_cleanup() < 0) 2899 RTE_LOG(ERR, L3FWD_POWER, "EAL cleanup failed\n"); 2900 2901 return 0; 2902 } 2903