1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2018 Intel Corporation 3 */ 4 5 #include <stdio.h> 6 #include <stdlib.h> 7 #include <stdint.h> 8 #include <inttypes.h> 9 #include <sys/types.h> 10 #include <string.h> 11 #include <sys/queue.h> 12 #include <stdarg.h> 13 #include <errno.h> 14 #include <getopt.h> 15 #include <unistd.h> 16 #include <signal.h> 17 #include <math.h> 18 19 #include <rte_common.h> 20 #include <rte_byteorder.h> 21 #include <rte_log.h> 22 #include <rte_malloc.h> 23 #include <rte_memory.h> 24 #include <rte_memcpy.h> 25 #include <rte_eal.h> 26 #include <rte_launch.h> 27 #include <rte_cycles.h> 28 #include <rte_prefetch.h> 29 #include <rte_lcore.h> 30 #include <rte_per_lcore.h> 31 #include <rte_branch_prediction.h> 32 #include <rte_interrupts.h> 33 #include <rte_random.h> 34 #include <rte_debug.h> 35 #include <rte_ether.h> 36 #include <rte_ethdev.h> 37 #include <rte_mempool.h> 38 #include <rte_mbuf.h> 39 #include <rte_ip.h> 40 #include <rte_tcp.h> 41 #include <rte_udp.h> 42 #include <rte_string_fns.h> 43 #include <rte_timer.h> 44 #include <rte_power.h> 45 #include <rte_spinlock.h> 46 #include <rte_metrics.h> 47 #include <rte_telemetry.h> 48 #include <rte_power_pmd_mgmt.h> 49 #include <rte_power_uncore.h> 50 51 #include "perf_core.h" 52 #include "main.h" 53 54 RTE_LOG_REGISTER(l3fwd_power_logtype, l3fwd.power, INFO); 55 #define RTE_LOGTYPE_L3FWD_POWER l3fwd_power_logtype 56 57 #define MAX_PKT_BURST 32 58 59 #define MIN_ZERO_POLL_COUNT 10 60 61 /* 100 ms interval */ 62 #define TIMER_NUMBER_PER_SECOND 10 63 /* (10ms) */ 64 #define INTERVALS_PER_SECOND 100 65 /* 100000 us */ 66 #define SCALING_PERIOD (1000000/TIMER_NUMBER_PER_SECOND) 67 #define SCALING_DOWN_TIME_RATIO_THRESHOLD 0.25 68 69 #define APP_LOOKUP_EXACT_MATCH 0 70 #define APP_LOOKUP_LPM 1 71 #define DO_RFC_1812_CHECKS 72 73 #ifndef APP_LOOKUP_METHOD 74 #define APP_LOOKUP_METHOD APP_LOOKUP_LPM 75 #endif 76 77 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 78 #include <rte_hash.h> 79 #elif (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 80 #include <rte_lpm.h> 81 #else 82 #error "APP_LOOKUP_METHOD set to incorrect value" 83 #endif 84 85 #ifndef IPv6_BYTES 86 #define IPv6_BYTES_FMT "%02x%02x:%02x%02x:%02x%02x:%02x%02x:"\ 87 "%02x%02x:%02x%02x:%02x%02x:%02x%02x" 88 #define IPv6_BYTES(addr) \ 89 addr[0], addr[1], addr[2], addr[3], \ 90 addr[4], addr[5], addr[6], addr[7], \ 91 addr[8], addr[9], addr[10], addr[11],\ 92 addr[12], addr[13],addr[14], addr[15] 93 #endif 94 95 #define MAX_JUMBO_PKT_LEN 9600 96 97 #define IPV6_ADDR_LEN 16 98 99 #define MEMPOOL_CACHE_SIZE 256 100 101 /* 102 * This expression is used to calculate the number of mbufs needed depending on 103 * user input, taking into account memory for rx and tx hardware rings, cache 104 * per lcore and mtable per port per lcore. RTE_MAX is used to ensure that 105 * NB_MBUF never goes below a minimum value of 8192. 106 */ 107 108 #define NB_MBUF RTE_MAX ( \ 109 (nb_ports*nb_rx_queue*nb_rxd + \ 110 nb_ports*nb_lcores*MAX_PKT_BURST + \ 111 nb_ports*n_tx_queue*nb_txd + \ 112 nb_lcores*MEMPOOL_CACHE_SIZE), \ 113 (unsigned)8192) 114 115 #define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */ 116 117 #define NB_SOCKETS 8 118 119 /* Configure how many packets ahead to prefetch, when reading packets */ 120 #define PREFETCH_OFFSET 3 121 122 /* 123 * Configurable number of RX/TX ring descriptors 124 */ 125 #define RX_DESC_DEFAULT 1024 126 #define TX_DESC_DEFAULT 1024 127 128 #define NUM_TELSTATS RTE_DIM(telstats_strings) 129 130 static uint16_t nb_rxd = RX_DESC_DEFAULT; 131 static uint16_t nb_txd = TX_DESC_DEFAULT; 132 133 /* ethernet addresses of ports */ 134 static struct rte_ether_addr ports_eth_addr[RTE_MAX_ETHPORTS]; 135 136 /* ethernet addresses of ports */ 137 static rte_spinlock_t locks[RTE_MAX_ETHPORTS]; 138 139 /* mask of enabled ports */ 140 static uint32_t enabled_port_mask = 0; 141 /* Ports set in promiscuous mode off by default. */ 142 static int promiscuous_on = 0; 143 /* NUMA is enabled by default. */ 144 static int numa_on = 1; 145 volatile bool quit_signal; 146 /* timer to update telemetry every 500ms */ 147 static struct rte_timer telemetry_timer; 148 149 /* stats index returned by metrics lib */ 150 int telstats_index; 151 152 /* flag to check if uncore option enabled */ 153 int enabled_uncore = -1; 154 155 struct telstats_name { 156 char name[RTE_ETH_XSTATS_NAME_SIZE]; 157 }; 158 159 /* telemetry stats to be reported */ 160 const struct telstats_name telstats_strings[] = { 161 {"empty_poll"}, 162 {"full_poll"}, 163 {"busy_percent"} 164 }; 165 166 /* core busyness in percentage */ 167 enum busy_rate { 168 ZERO = 0, 169 PARTIAL = 50, 170 FULL = 100 171 }; 172 173 enum uncore_choice { 174 UNCORE_MIN = 0, 175 UNCORE_MAX = 1, 176 UNCORE_IDX = 2 177 }; 178 179 /* reference poll count to measure core busyness */ 180 #define DEFAULT_COUNT 10000 181 /* 182 * reference CYCLES to be used to 183 * measure core busyness based on poll count 184 */ 185 #define MIN_CYCLES 1500000ULL 186 #define MAX_CYCLES 22000000ULL 187 188 /* (500ms) */ 189 #define TELEMETRY_INTERVALS_PER_SEC 2 190 191 static int parse_ptype; /**< Parse packet type using rx callback, and */ 192 /**< disabled by default */ 193 194 enum appmode { 195 APP_MODE_DEFAULT = 0, 196 APP_MODE_LEGACY, 197 APP_MODE_TELEMETRY, 198 APP_MODE_INTERRUPT, 199 APP_MODE_PMD_MGMT 200 }; 201 202 enum appmode app_mode; 203 204 static enum rte_power_pmd_mgmt_type pmgmt_type; 205 bool baseline_enabled; 206 207 enum freq_scale_hint_t 208 { 209 FREQ_LOWER = -1, 210 FREQ_CURRENT = 0, 211 FREQ_HIGHER = 1, 212 FREQ_HIGHEST = 2 213 }; 214 215 struct lcore_rx_queue { 216 uint16_t port_id; 217 uint8_t queue_id; 218 enum freq_scale_hint_t freq_up_hint; 219 uint32_t zero_rx_packet_count; 220 uint32_t idle_hint; 221 } __rte_cache_aligned; 222 223 #define MAX_RX_QUEUE_PER_LCORE 16 224 #define MAX_TX_QUEUE_PER_PORT RTE_MAX_ETHPORTS 225 #define MAX_RX_QUEUE_PER_PORT 128 226 227 #define MAX_RX_QUEUE_INTERRUPT_PER_PORT 16 228 229 230 struct lcore_params lcore_params_array[MAX_LCORE_PARAMS]; 231 static struct lcore_params lcore_params_array_default[] = { 232 {0, 0, 2}, 233 {0, 1, 2}, 234 {0, 2, 2}, 235 {1, 0, 2}, 236 {1, 1, 2}, 237 {1, 2, 2}, 238 {2, 0, 2}, 239 {3, 0, 3}, 240 {3, 1, 3}, 241 }; 242 243 struct lcore_params *lcore_params = lcore_params_array_default; 244 uint16_t nb_lcore_params = RTE_DIM(lcore_params_array_default); 245 246 static struct rte_eth_conf port_conf = { 247 .rxmode = { 248 .mq_mode = RTE_ETH_MQ_RX_RSS, 249 .offloads = RTE_ETH_RX_OFFLOAD_CHECKSUM, 250 }, 251 .rx_adv_conf = { 252 .rss_conf = { 253 .rss_key = NULL, 254 .rss_hf = RTE_ETH_RSS_UDP, 255 }, 256 }, 257 .txmode = { 258 .mq_mode = RTE_ETH_MQ_TX_NONE, 259 } 260 }; 261 262 static uint32_t max_pkt_len; 263 static uint32_t max_empty_polls = 512; 264 static uint32_t pause_duration = 1; 265 static uint32_t scale_freq_min; 266 static uint32_t scale_freq_max; 267 268 static struct rte_mempool * pktmbuf_pool[NB_SOCKETS]; 269 270 271 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 272 273 #ifdef RTE_ARCH_X86 274 #include <rte_hash_crc.h> 275 #define DEFAULT_HASH_FUNC rte_hash_crc 276 #else 277 #include <rte_jhash.h> 278 #define DEFAULT_HASH_FUNC rte_jhash 279 #endif 280 281 struct ipv4_5tuple { 282 uint32_t ip_dst; 283 uint32_t ip_src; 284 uint16_t port_dst; 285 uint16_t port_src; 286 uint8_t proto; 287 } __rte_packed; 288 289 struct ipv6_5tuple { 290 uint8_t ip_dst[IPV6_ADDR_LEN]; 291 uint8_t ip_src[IPV6_ADDR_LEN]; 292 uint16_t port_dst; 293 uint16_t port_src; 294 uint8_t proto; 295 } __rte_packed; 296 297 struct ipv4_l3fwd_route { 298 struct ipv4_5tuple key; 299 uint8_t if_out; 300 }; 301 302 struct ipv6_l3fwd_route { 303 struct ipv6_5tuple key; 304 uint8_t if_out; 305 }; 306 307 static struct ipv4_l3fwd_route ipv4_l3fwd_route_array[] = { 308 {{RTE_IPV4(100,10,0,1), RTE_IPV4(200,10,0,1), 101, 11, IPPROTO_TCP}, 0}, 309 {{RTE_IPV4(100,20,0,2), RTE_IPV4(200,20,0,2), 102, 12, IPPROTO_TCP}, 1}, 310 {{RTE_IPV4(100,30,0,3), RTE_IPV4(200,30,0,3), 103, 13, IPPROTO_TCP}, 2}, 311 {{RTE_IPV4(100,40,0,4), RTE_IPV4(200,40,0,4), 104, 14, IPPROTO_TCP}, 3}, 312 }; 313 314 static struct ipv6_l3fwd_route ipv6_l3fwd_route_array[] = { 315 { 316 { 317 {0xfe, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 318 0x02, 0x1b, 0x21, 0xff, 0xfe, 0x91, 0x38, 0x05}, 319 {0xfe, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 320 0x02, 0x1e, 0x67, 0xff, 0xfe, 0x0d, 0xb6, 0x0a}, 321 1, 10, IPPROTO_UDP 322 }, 4 323 }, 324 }; 325 326 typedef struct rte_hash lookup_struct_t; 327 static lookup_struct_t *ipv4_l3fwd_lookup_struct[NB_SOCKETS]; 328 static lookup_struct_t *ipv6_l3fwd_lookup_struct[NB_SOCKETS]; 329 330 #define L3FWD_HASH_ENTRIES 1024 331 332 static uint16_t ipv4_l3fwd_out_if[L3FWD_HASH_ENTRIES] __rte_cache_aligned; 333 static uint16_t ipv6_l3fwd_out_if[L3FWD_HASH_ENTRIES] __rte_cache_aligned; 334 #endif 335 336 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 337 struct ipv4_l3fwd_route { 338 uint32_t ip; 339 uint8_t depth; 340 uint8_t if_out; 341 }; 342 343 static struct ipv4_l3fwd_route ipv4_l3fwd_route_array[] = { 344 {RTE_IPV4(1,1,1,0), 24, 0}, 345 {RTE_IPV4(2,1,1,0), 24, 1}, 346 {RTE_IPV4(3,1,1,0), 24, 2}, 347 {RTE_IPV4(4,1,1,0), 24, 3}, 348 {RTE_IPV4(5,1,1,0), 24, 4}, 349 {RTE_IPV4(6,1,1,0), 24, 5}, 350 {RTE_IPV4(7,1,1,0), 24, 6}, 351 {RTE_IPV4(8,1,1,0), 24, 7}, 352 }; 353 354 #define IPV4_L3FWD_LPM_MAX_RULES 1024 355 356 typedef struct rte_lpm lookup_struct_t; 357 static lookup_struct_t *ipv4_l3fwd_lookup_struct[NB_SOCKETS]; 358 #endif 359 360 struct lcore_conf { 361 uint16_t n_rx_queue; 362 struct lcore_rx_queue rx_queue_list[MAX_RX_QUEUE_PER_LCORE]; 363 uint16_t n_tx_port; 364 uint16_t tx_port_id[RTE_MAX_ETHPORTS]; 365 uint16_t tx_queue_id[RTE_MAX_ETHPORTS]; 366 struct rte_eth_dev_tx_buffer *tx_buffer[RTE_MAX_ETHPORTS]; 367 lookup_struct_t * ipv4_lookup_struct; 368 lookup_struct_t * ipv6_lookup_struct; 369 } __rte_cache_aligned; 370 371 struct lcore_stats { 372 /* total sleep time in ms since last frequency scaling down */ 373 uint32_t sleep_time; 374 /* number of long sleep recently */ 375 uint32_t nb_long_sleep; 376 /* freq. scaling up trend */ 377 uint32_t trend; 378 /* total packet processed recently */ 379 uint64_t nb_rx_processed; 380 /* total iterations looped recently */ 381 uint64_t nb_iteration_looped; 382 /* 383 * Represents empty and non empty polls 384 * of rte_eth_rx_burst(); 385 * ep_nep[0] holds non empty polls 386 * i.e. 0 < nb_rx <= MAX_BURST 387 * ep_nep[1] holds empty polls. 388 * i.e. nb_rx == 0 389 */ 390 uint64_t ep_nep[2]; 391 /* 392 * Represents full and empty+partial 393 * polls of rte_eth_rx_burst(); 394 * ep_nep[0] holds empty+partial polls. 395 * i.e. 0 <= nb_rx < MAX_BURST 396 * ep_nep[1] holds full polls 397 * i.e. nb_rx == MAX_BURST 398 */ 399 uint64_t fp_nfp[2]; 400 enum busy_rate br; 401 rte_spinlock_t telemetry_lock; 402 } __rte_cache_aligned; 403 404 static struct lcore_conf lcore_conf[RTE_MAX_LCORE] __rte_cache_aligned; 405 static struct lcore_stats stats[RTE_MAX_LCORE] __rte_cache_aligned; 406 static struct rte_timer power_timers[RTE_MAX_LCORE]; 407 408 static inline uint32_t power_idle_heuristic(uint32_t zero_rx_packet_count); 409 static inline enum freq_scale_hint_t power_freq_scaleup_heuristic( \ 410 unsigned int lcore_id, uint16_t port_id, uint16_t queue_id); 411 412 static int is_done(void) 413 { 414 return quit_signal; 415 } 416 417 /* exit signal handler */ 418 static void 419 signal_exit_now(int sigtype) 420 { 421 422 if (sigtype == SIGINT) 423 quit_signal = true; 424 425 } 426 427 /* Frequency scale down timer callback */ 428 static void 429 power_timer_cb(__rte_unused struct rte_timer *tim, 430 __rte_unused void *arg) 431 { 432 uint64_t hz; 433 float sleep_time_ratio; 434 unsigned lcore_id = rte_lcore_id(); 435 436 /* accumulate total execution time in us when callback is invoked */ 437 sleep_time_ratio = (float)(stats[lcore_id].sleep_time) / 438 (float)SCALING_PERIOD; 439 /** 440 * check whether need to scale down frequency a step if it sleep a lot. 441 */ 442 if (sleep_time_ratio >= SCALING_DOWN_TIME_RATIO_THRESHOLD) { 443 if (rte_power_freq_down) 444 rte_power_freq_down(lcore_id); 445 } 446 else if ( (unsigned)(stats[lcore_id].nb_rx_processed / 447 stats[lcore_id].nb_iteration_looped) < MAX_PKT_BURST) { 448 /** 449 * scale down a step if average packet per iteration less 450 * than expectation. 451 */ 452 if (rte_power_freq_down) 453 rte_power_freq_down(lcore_id); 454 } 455 456 /** 457 * initialize another timer according to current frequency to ensure 458 * timer interval is relatively fixed. 459 */ 460 hz = rte_get_timer_hz(); 461 rte_timer_reset(&power_timers[lcore_id], hz/TIMER_NUMBER_PER_SECOND, 462 SINGLE, lcore_id, power_timer_cb, NULL); 463 464 stats[lcore_id].nb_rx_processed = 0; 465 stats[lcore_id].nb_iteration_looped = 0; 466 467 stats[lcore_id].sleep_time = 0; 468 } 469 470 /* Enqueue a single packet, and send burst if queue is filled */ 471 static inline int 472 send_single_packet(struct rte_mbuf *m, uint16_t port) 473 { 474 uint32_t lcore_id; 475 struct lcore_conf *qconf; 476 477 lcore_id = rte_lcore_id(); 478 qconf = &lcore_conf[lcore_id]; 479 480 rte_eth_tx_buffer(port, qconf->tx_queue_id[port], 481 qconf->tx_buffer[port], m); 482 483 return 0; 484 } 485 486 #ifdef DO_RFC_1812_CHECKS 487 static inline int 488 is_valid_ipv4_pkt(struct rte_ipv4_hdr *pkt, uint32_t link_len) 489 { 490 /* From http://www.rfc-editor.org/rfc/rfc1812.txt section 5.2.2 */ 491 /* 492 * 1. The packet length reported by the Link Layer must be large 493 * enough to hold the minimum length legal IP datagram (20 bytes). 494 */ 495 if (link_len < sizeof(struct rte_ipv4_hdr)) 496 return -1; 497 498 /* 2. The IP checksum must be correct. */ 499 /* if this is not checked in H/W, check it. */ 500 if ((port_conf.rxmode.offloads & RTE_ETH_RX_OFFLOAD_IPV4_CKSUM) == 0) { 501 uint16_t actual_cksum, expected_cksum; 502 actual_cksum = pkt->hdr_checksum; 503 pkt->hdr_checksum = 0; 504 expected_cksum = rte_ipv4_cksum(pkt); 505 if (actual_cksum != expected_cksum) 506 return -2; 507 } 508 509 /* 510 * 3. The IP version number must be 4. If the version number is not 4 511 * then the packet may be another version of IP, such as IPng or 512 * ST-II. 513 */ 514 if (((pkt->version_ihl) >> 4) != 4) 515 return -3; 516 /* 517 * 4. The IP header length field must be large enough to hold the 518 * minimum length legal IP datagram (20 bytes = 5 words). 519 */ 520 if ((pkt->version_ihl & 0xf) < 5) 521 return -4; 522 523 /* 524 * 5. The IP total length field must be large enough to hold the IP 525 * datagram header, whose length is specified in the IP header length 526 * field. 527 */ 528 if (rte_cpu_to_be_16(pkt->total_length) < sizeof(struct rte_ipv4_hdr)) 529 return -5; 530 531 return 0; 532 } 533 #endif 534 535 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 536 static void 537 print_ipv4_key(struct ipv4_5tuple key) 538 { 539 printf("IP dst = %08x, IP src = %08x, port dst = %d, port src = %d, " 540 "proto = %d\n", (unsigned)key.ip_dst, (unsigned)key.ip_src, 541 key.port_dst, key.port_src, key.proto); 542 } 543 static void 544 print_ipv6_key(struct ipv6_5tuple key) 545 { 546 printf( "IP dst = " IPv6_BYTES_FMT ", IP src = " IPv6_BYTES_FMT ", " 547 "port dst = %d, port src = %d, proto = %d\n", 548 IPv6_BYTES(key.ip_dst), IPv6_BYTES(key.ip_src), 549 key.port_dst, key.port_src, key.proto); 550 } 551 552 static inline uint16_t 553 get_ipv4_dst_port(struct rte_ipv4_hdr *ipv4_hdr, uint16_t portid, 554 lookup_struct_t * ipv4_l3fwd_lookup_struct) 555 { 556 struct ipv4_5tuple key; 557 struct rte_tcp_hdr *tcp; 558 struct rte_udp_hdr *udp; 559 int ret = 0; 560 561 key.ip_dst = rte_be_to_cpu_32(ipv4_hdr->dst_addr); 562 key.ip_src = rte_be_to_cpu_32(ipv4_hdr->src_addr); 563 key.proto = ipv4_hdr->next_proto_id; 564 565 switch (ipv4_hdr->next_proto_id) { 566 case IPPROTO_TCP: 567 tcp = (struct rte_tcp_hdr *)((unsigned char *)ipv4_hdr + 568 sizeof(struct rte_ipv4_hdr)); 569 key.port_dst = rte_be_to_cpu_16(tcp->dst_port); 570 key.port_src = rte_be_to_cpu_16(tcp->src_port); 571 break; 572 573 case IPPROTO_UDP: 574 udp = (struct rte_udp_hdr *)((unsigned char *)ipv4_hdr + 575 sizeof(struct rte_ipv4_hdr)); 576 key.port_dst = rte_be_to_cpu_16(udp->dst_port); 577 key.port_src = rte_be_to_cpu_16(udp->src_port); 578 break; 579 580 default: 581 key.port_dst = 0; 582 key.port_src = 0; 583 break; 584 } 585 586 /* Find destination port */ 587 ret = rte_hash_lookup(ipv4_l3fwd_lookup_struct, (const void *)&key); 588 return ((ret < 0) ? portid : ipv4_l3fwd_out_if[ret]); 589 } 590 591 static inline uint16_t 592 get_ipv6_dst_port(struct rte_ipv6_hdr *ipv6_hdr, uint16_t portid, 593 lookup_struct_t *ipv6_l3fwd_lookup_struct) 594 { 595 struct ipv6_5tuple key; 596 struct rte_tcp_hdr *tcp; 597 struct rte_udp_hdr *udp; 598 int ret = 0; 599 600 memcpy(key.ip_dst, ipv6_hdr->dst_addr, IPV6_ADDR_LEN); 601 memcpy(key.ip_src, ipv6_hdr->src_addr, IPV6_ADDR_LEN); 602 603 key.proto = ipv6_hdr->proto; 604 605 switch (ipv6_hdr->proto) { 606 case IPPROTO_TCP: 607 tcp = (struct rte_tcp_hdr *)((unsigned char *) ipv6_hdr + 608 sizeof(struct rte_ipv6_hdr)); 609 key.port_dst = rte_be_to_cpu_16(tcp->dst_port); 610 key.port_src = rte_be_to_cpu_16(tcp->src_port); 611 break; 612 613 case IPPROTO_UDP: 614 udp = (struct rte_udp_hdr *)((unsigned char *) ipv6_hdr + 615 sizeof(struct rte_ipv6_hdr)); 616 key.port_dst = rte_be_to_cpu_16(udp->dst_port); 617 key.port_src = rte_be_to_cpu_16(udp->src_port); 618 break; 619 620 default: 621 key.port_dst = 0; 622 key.port_src = 0; 623 break; 624 } 625 626 /* Find destination port */ 627 ret = rte_hash_lookup(ipv6_l3fwd_lookup_struct, (const void *)&key); 628 return ((ret < 0) ? portid : ipv6_l3fwd_out_if[ret]); 629 } 630 #endif 631 632 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 633 static inline uint16_t 634 get_ipv4_dst_port(struct rte_ipv4_hdr *ipv4_hdr, uint16_t portid, 635 lookup_struct_t *ipv4_l3fwd_lookup_struct) 636 { 637 uint32_t next_hop; 638 639 return ((rte_lpm_lookup(ipv4_l3fwd_lookup_struct, 640 rte_be_to_cpu_32(ipv4_hdr->dst_addr), &next_hop) == 0)? 641 next_hop : portid); 642 } 643 #endif 644 645 static inline void 646 parse_ptype_one(struct rte_mbuf *m) 647 { 648 struct rte_ether_hdr *eth_hdr; 649 uint32_t packet_type = RTE_PTYPE_UNKNOWN; 650 uint16_t ether_type; 651 652 eth_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); 653 ether_type = eth_hdr->ether_type; 654 if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4)) 655 packet_type |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN; 656 else if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6)) 657 packet_type |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN; 658 659 m->packet_type = packet_type; 660 } 661 662 static uint16_t 663 cb_parse_ptype(uint16_t port __rte_unused, uint16_t queue __rte_unused, 664 struct rte_mbuf *pkts[], uint16_t nb_pkts, 665 uint16_t max_pkts __rte_unused, 666 void *user_param __rte_unused) 667 { 668 unsigned int i; 669 670 for (i = 0; i < nb_pkts; ++i) 671 parse_ptype_one(pkts[i]); 672 673 return nb_pkts; 674 } 675 676 static int 677 add_cb_parse_ptype(uint16_t portid, uint16_t queueid) 678 { 679 printf("Port %d: softly parse packet type info\n", portid); 680 if (rte_eth_add_rx_callback(portid, queueid, cb_parse_ptype, NULL)) 681 return 0; 682 683 printf("Failed to add rx callback: port=%d\n", portid); 684 return -1; 685 } 686 687 static inline void 688 l3fwd_simple_forward(struct rte_mbuf *m, uint16_t portid, 689 struct lcore_conf *qconf) 690 { 691 struct rte_ether_hdr *eth_hdr; 692 struct rte_ipv4_hdr *ipv4_hdr; 693 void *d_addr_bytes; 694 uint16_t dst_port; 695 696 eth_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); 697 698 if (RTE_ETH_IS_IPV4_HDR(m->packet_type)) { 699 /* Handle IPv4 headers.*/ 700 ipv4_hdr = 701 rte_pktmbuf_mtod_offset(m, struct rte_ipv4_hdr *, 702 sizeof(struct rte_ether_hdr)); 703 704 #ifdef DO_RFC_1812_CHECKS 705 /* Check to make sure the packet is valid (RFC1812) */ 706 if (is_valid_ipv4_pkt(ipv4_hdr, m->pkt_len) < 0) { 707 rte_pktmbuf_free(m); 708 return; 709 } 710 #endif 711 712 dst_port = get_ipv4_dst_port(ipv4_hdr, portid, 713 qconf->ipv4_lookup_struct); 714 if (dst_port >= RTE_MAX_ETHPORTS || 715 (enabled_port_mask & 1 << dst_port) == 0) 716 dst_port = portid; 717 718 /* 02:00:00:00:00:xx */ 719 d_addr_bytes = ð_hdr->dst_addr.addr_bytes[0]; 720 *((uint64_t *)d_addr_bytes) = 721 0x000000000002 + ((uint64_t)dst_port << 40); 722 723 #ifdef DO_RFC_1812_CHECKS 724 /* Update time to live and header checksum */ 725 --(ipv4_hdr->time_to_live); 726 ++(ipv4_hdr->hdr_checksum); 727 #endif 728 729 /* src addr */ 730 rte_ether_addr_copy(&ports_eth_addr[dst_port], 731 ð_hdr->src_addr); 732 733 send_single_packet(m, dst_port); 734 } else if (RTE_ETH_IS_IPV6_HDR(m->packet_type)) { 735 /* Handle IPv6 headers.*/ 736 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 737 struct rte_ipv6_hdr *ipv6_hdr; 738 739 ipv6_hdr = 740 rte_pktmbuf_mtod_offset(m, struct rte_ipv6_hdr *, 741 sizeof(struct rte_ether_hdr)); 742 743 dst_port = get_ipv6_dst_port(ipv6_hdr, portid, 744 qconf->ipv6_lookup_struct); 745 746 if (dst_port >= RTE_MAX_ETHPORTS || 747 (enabled_port_mask & 1 << dst_port) == 0) 748 dst_port = portid; 749 750 /* 02:00:00:00:00:xx */ 751 d_addr_bytes = ð_hdr->dst_addr.addr_bytes[0]; 752 *((uint64_t *)d_addr_bytes) = 753 0x000000000002 + ((uint64_t)dst_port << 40); 754 755 /* src addr */ 756 rte_ether_addr_copy(&ports_eth_addr[dst_port], 757 ð_hdr->src_addr); 758 759 send_single_packet(m, dst_port); 760 #else 761 /* We don't currently handle IPv6 packets in LPM mode. */ 762 rte_pktmbuf_free(m); 763 #endif 764 } else 765 rte_pktmbuf_free(m); 766 767 } 768 769 #define MINIMUM_SLEEP_TIME 1 770 #define SUSPEND_THRESHOLD 300 771 772 static inline uint32_t 773 power_idle_heuristic(uint32_t zero_rx_packet_count) 774 { 775 /* If zero count is less than 100, sleep 1us */ 776 if (zero_rx_packet_count < SUSPEND_THRESHOLD) 777 return MINIMUM_SLEEP_TIME; 778 /* If zero count is less than 1000, sleep 100 us which is the 779 minimum latency switching from C3/C6 to C0 780 */ 781 else 782 return SUSPEND_THRESHOLD; 783 } 784 785 static inline enum freq_scale_hint_t 786 power_freq_scaleup_heuristic(unsigned lcore_id, 787 uint16_t port_id, 788 uint16_t queue_id) 789 { 790 uint32_t rxq_count = rte_eth_rx_queue_count(port_id, queue_id); 791 /** 792 * HW Rx queue size is 128 by default, Rx burst read at maximum 32 entries 793 * per iteration 794 */ 795 #define FREQ_GEAR1_RX_PACKET_THRESHOLD MAX_PKT_BURST 796 #define FREQ_GEAR2_RX_PACKET_THRESHOLD (MAX_PKT_BURST*2) 797 #define FREQ_GEAR3_RX_PACKET_THRESHOLD (MAX_PKT_BURST*3) 798 #define FREQ_UP_TREND1_ACC 1 799 #define FREQ_UP_TREND2_ACC 100 800 #define FREQ_UP_THRESHOLD 10000 801 802 if (likely(rxq_count > FREQ_GEAR3_RX_PACKET_THRESHOLD)) { 803 stats[lcore_id].trend = 0; 804 return FREQ_HIGHEST; 805 } else if (likely(rxq_count > FREQ_GEAR2_RX_PACKET_THRESHOLD)) 806 stats[lcore_id].trend += FREQ_UP_TREND2_ACC; 807 else if (likely(rxq_count > FREQ_GEAR1_RX_PACKET_THRESHOLD)) 808 stats[lcore_id].trend += FREQ_UP_TREND1_ACC; 809 810 if (likely(stats[lcore_id].trend > FREQ_UP_THRESHOLD)) { 811 stats[lcore_id].trend = 0; 812 return FREQ_HIGHER; 813 } 814 815 return FREQ_CURRENT; 816 } 817 818 /** 819 * force polling thread sleep until one-shot rx interrupt triggers 820 * @param port_id 821 * Port id. 822 * @param queue_id 823 * Rx queue id. 824 * @return 825 * 0 on success 826 */ 827 static int 828 sleep_until_rx_interrupt(int num, int lcore) 829 { 830 /* 831 * we want to track when we are woken up by traffic so that we can go 832 * back to sleep again without log spamming. Avoid cache line sharing 833 * to prevent threads stepping on each others' toes. 834 */ 835 static struct { 836 bool wakeup; 837 } __rte_cache_aligned status[RTE_MAX_LCORE]; 838 struct rte_epoll_event event[num]; 839 int n, i; 840 uint16_t port_id; 841 uint8_t queue_id; 842 void *data; 843 844 if (status[lcore].wakeup) { 845 RTE_LOG(INFO, L3FWD_POWER, 846 "lcore %u sleeps until interrupt triggers\n", 847 rte_lcore_id()); 848 } 849 850 n = rte_epoll_wait(RTE_EPOLL_PER_THREAD, event, num, 10); 851 for (i = 0; i < n; i++) { 852 data = event[i].epdata.data; 853 port_id = ((uintptr_t)data) >> CHAR_BIT; 854 queue_id = ((uintptr_t)data) & 855 RTE_LEN2MASK(CHAR_BIT, uint8_t); 856 RTE_LOG(INFO, L3FWD_POWER, 857 "lcore %u is waked up from rx interrupt on" 858 " port %d queue %d\n", 859 rte_lcore_id(), port_id, queue_id); 860 } 861 status[lcore].wakeup = n != 0; 862 863 return 0; 864 } 865 866 static void turn_on_off_intr(struct lcore_conf *qconf, bool on) 867 { 868 int i; 869 struct lcore_rx_queue *rx_queue; 870 uint8_t queue_id; 871 uint16_t port_id; 872 873 for (i = 0; i < qconf->n_rx_queue; ++i) { 874 rx_queue = &(qconf->rx_queue_list[i]); 875 port_id = rx_queue->port_id; 876 queue_id = rx_queue->queue_id; 877 878 rte_spinlock_lock(&(locks[port_id])); 879 if (on) 880 rte_eth_dev_rx_intr_enable(port_id, queue_id); 881 else 882 rte_eth_dev_rx_intr_disable(port_id, queue_id); 883 rte_spinlock_unlock(&(locks[port_id])); 884 } 885 } 886 887 static int event_register(struct lcore_conf *qconf) 888 { 889 struct lcore_rx_queue *rx_queue; 890 uint8_t queueid; 891 uint16_t portid; 892 uint32_t data; 893 int ret; 894 int i; 895 896 for (i = 0; i < qconf->n_rx_queue; ++i) { 897 rx_queue = &(qconf->rx_queue_list[i]); 898 portid = rx_queue->port_id; 899 queueid = rx_queue->queue_id; 900 data = portid << CHAR_BIT | queueid; 901 902 ret = rte_eth_dev_rx_intr_ctl_q(portid, queueid, 903 RTE_EPOLL_PER_THREAD, 904 RTE_INTR_EVENT_ADD, 905 (void *)((uintptr_t)data)); 906 if (ret) 907 return ret; 908 } 909 910 return 0; 911 } 912 913 /* Main processing loop. 8< */ 914 static int main_intr_loop(__rte_unused void *dummy) 915 { 916 struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; 917 unsigned int lcore_id; 918 uint64_t prev_tsc, diff_tsc, cur_tsc; 919 int i, j, nb_rx; 920 uint8_t queueid; 921 uint16_t portid; 922 struct lcore_conf *qconf; 923 struct lcore_rx_queue *rx_queue; 924 uint32_t lcore_rx_idle_count = 0; 925 uint32_t lcore_idle_hint = 0; 926 int intr_en = 0; 927 928 const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / 929 US_PER_S * BURST_TX_DRAIN_US; 930 931 prev_tsc = 0; 932 933 lcore_id = rte_lcore_id(); 934 qconf = &lcore_conf[lcore_id]; 935 936 if (qconf->n_rx_queue == 0) { 937 RTE_LOG(INFO, L3FWD_POWER, "lcore %u has nothing to do\n", 938 lcore_id); 939 return 0; 940 } 941 942 RTE_LOG(INFO, L3FWD_POWER, "entering main interrupt loop on lcore %u\n", 943 lcore_id); 944 945 for (i = 0; i < qconf->n_rx_queue; i++) { 946 portid = qconf->rx_queue_list[i].port_id; 947 queueid = qconf->rx_queue_list[i].queue_id; 948 RTE_LOG(INFO, L3FWD_POWER, 949 " -- lcoreid=%u portid=%u rxqueueid=%hhu\n", 950 lcore_id, portid, queueid); 951 } 952 953 /* add into event wait list */ 954 if (event_register(qconf) == 0) 955 intr_en = 1; 956 else 957 RTE_LOG(INFO, L3FWD_POWER, "RX interrupt won't enable.\n"); 958 959 while (!is_done()) { 960 stats[lcore_id].nb_iteration_looped++; 961 962 cur_tsc = rte_rdtsc(); 963 964 /* 965 * TX burst queue drain 966 */ 967 diff_tsc = cur_tsc - prev_tsc; 968 if (unlikely(diff_tsc > drain_tsc)) { 969 for (i = 0; i < qconf->n_tx_port; ++i) { 970 portid = qconf->tx_port_id[i]; 971 rte_eth_tx_buffer_flush(portid, 972 qconf->tx_queue_id[portid], 973 qconf->tx_buffer[portid]); 974 } 975 prev_tsc = cur_tsc; 976 } 977 978 start_rx: 979 /* 980 * Read packet from RX queues 981 */ 982 lcore_rx_idle_count = 0; 983 for (i = 0; i < qconf->n_rx_queue; ++i) { 984 rx_queue = &(qconf->rx_queue_list[i]); 985 rx_queue->idle_hint = 0; 986 portid = rx_queue->port_id; 987 queueid = rx_queue->queue_id; 988 989 nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst, 990 MAX_PKT_BURST); 991 992 stats[lcore_id].nb_rx_processed += nb_rx; 993 if (unlikely(nb_rx == 0)) { 994 /** 995 * no packet received from rx queue, try to 996 * sleep for a while forcing CPU enter deeper 997 * C states. 998 */ 999 rx_queue->zero_rx_packet_count++; 1000 1001 if (rx_queue->zero_rx_packet_count <= 1002 MIN_ZERO_POLL_COUNT) 1003 continue; 1004 1005 rx_queue->idle_hint = power_idle_heuristic( 1006 rx_queue->zero_rx_packet_count); 1007 lcore_rx_idle_count++; 1008 } else { 1009 rx_queue->zero_rx_packet_count = 0; 1010 } 1011 1012 /* Prefetch first packets */ 1013 for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++) { 1014 rte_prefetch0(rte_pktmbuf_mtod( 1015 pkts_burst[j], void *)); 1016 } 1017 1018 /* Prefetch and forward already prefetched packets */ 1019 for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) { 1020 rte_prefetch0(rte_pktmbuf_mtod( 1021 pkts_burst[j + PREFETCH_OFFSET], 1022 void *)); 1023 l3fwd_simple_forward( 1024 pkts_burst[j], portid, qconf); 1025 } 1026 1027 /* Forward remaining prefetched packets */ 1028 for (; j < nb_rx; j++) { 1029 l3fwd_simple_forward( 1030 pkts_burst[j], portid, qconf); 1031 } 1032 } 1033 1034 if (unlikely(lcore_rx_idle_count == qconf->n_rx_queue)) { 1035 /** 1036 * All Rx queues empty in recent consecutive polls, 1037 * sleep in a conservative manner, meaning sleep as 1038 * less as possible. 1039 */ 1040 for (i = 1, 1041 lcore_idle_hint = qconf->rx_queue_list[0].idle_hint; 1042 i < qconf->n_rx_queue; ++i) { 1043 rx_queue = &(qconf->rx_queue_list[i]); 1044 if (rx_queue->idle_hint < lcore_idle_hint) 1045 lcore_idle_hint = rx_queue->idle_hint; 1046 } 1047 1048 if (lcore_idle_hint < SUSPEND_THRESHOLD) 1049 /** 1050 * execute "pause" instruction to avoid context 1051 * switch which generally take hundred of 1052 * microseconds for short sleep. 1053 */ 1054 rte_delay_us(lcore_idle_hint); 1055 else { 1056 /* suspend until rx interrupt triggers */ 1057 if (intr_en) { 1058 turn_on_off_intr(qconf, 1); 1059 sleep_until_rx_interrupt( 1060 qconf->n_rx_queue, 1061 lcore_id); 1062 turn_on_off_intr(qconf, 0); 1063 /** 1064 * start receiving packets immediately 1065 */ 1066 if (likely(!is_done())) 1067 goto start_rx; 1068 } 1069 } 1070 stats[lcore_id].sleep_time += lcore_idle_hint; 1071 } 1072 } 1073 1074 return 0; 1075 } 1076 /* >8 End of main processing loop. */ 1077 1078 /* main processing loop */ 1079 static int 1080 main_telemetry_loop(__rte_unused void *dummy) 1081 { 1082 struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; 1083 unsigned int lcore_id; 1084 uint64_t prev_tsc, diff_tsc, cur_tsc, prev_tel_tsc; 1085 int i, j, nb_rx; 1086 uint8_t queueid; 1087 uint16_t portid; 1088 struct lcore_conf *qconf; 1089 struct lcore_rx_queue *rx_queue; 1090 uint64_t ep_nep[2] = {0}, fp_nfp[2] = {0}; 1091 uint64_t poll_count; 1092 enum busy_rate br; 1093 1094 const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / 1095 US_PER_S * BURST_TX_DRAIN_US; 1096 1097 poll_count = 0; 1098 prev_tsc = 0; 1099 prev_tel_tsc = 0; 1100 1101 lcore_id = rte_lcore_id(); 1102 qconf = &lcore_conf[lcore_id]; 1103 1104 if (qconf->n_rx_queue == 0) { 1105 RTE_LOG(INFO, L3FWD_POWER, "lcore %u has nothing to do\n", 1106 lcore_id); 1107 return 0; 1108 } 1109 1110 RTE_LOG(INFO, L3FWD_POWER, "entering main telemetry loop on lcore %u\n", 1111 lcore_id); 1112 1113 for (i = 0; i < qconf->n_rx_queue; i++) { 1114 portid = qconf->rx_queue_list[i].port_id; 1115 queueid = qconf->rx_queue_list[i].queue_id; 1116 RTE_LOG(INFO, L3FWD_POWER, " -- lcoreid=%u portid=%u " 1117 "rxqueueid=%hhu\n", lcore_id, portid, queueid); 1118 } 1119 1120 while (!is_done()) { 1121 1122 cur_tsc = rte_rdtsc(); 1123 /* 1124 * TX burst queue drain 1125 */ 1126 diff_tsc = cur_tsc - prev_tsc; 1127 if (unlikely(diff_tsc > drain_tsc)) { 1128 for (i = 0; i < qconf->n_tx_port; ++i) { 1129 portid = qconf->tx_port_id[i]; 1130 rte_eth_tx_buffer_flush(portid, 1131 qconf->tx_queue_id[portid], 1132 qconf->tx_buffer[portid]); 1133 } 1134 prev_tsc = cur_tsc; 1135 } 1136 1137 /* 1138 * Read packet from RX queues 1139 */ 1140 for (i = 0; i < qconf->n_rx_queue; ++i) { 1141 rx_queue = &(qconf->rx_queue_list[i]); 1142 portid = rx_queue->port_id; 1143 queueid = rx_queue->queue_id; 1144 1145 nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst, 1146 MAX_PKT_BURST); 1147 ep_nep[nb_rx == 0]++; 1148 fp_nfp[nb_rx == MAX_PKT_BURST]++; 1149 poll_count++; 1150 if (unlikely(nb_rx == 0)) 1151 continue; 1152 1153 /* Prefetch first packets */ 1154 for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++) { 1155 rte_prefetch0(rte_pktmbuf_mtod( 1156 pkts_burst[j], void *)); 1157 } 1158 1159 /* Prefetch and forward already prefetched packets */ 1160 for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) { 1161 rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[ 1162 j + PREFETCH_OFFSET], void *)); 1163 l3fwd_simple_forward(pkts_burst[j], portid, 1164 qconf); 1165 } 1166 1167 /* Forward remaining prefetched packets */ 1168 for (; j < nb_rx; j++) { 1169 l3fwd_simple_forward(pkts_burst[j], portid, 1170 qconf); 1171 } 1172 } 1173 if (unlikely(poll_count >= DEFAULT_COUNT)) { 1174 diff_tsc = cur_tsc - prev_tel_tsc; 1175 if (diff_tsc >= MAX_CYCLES) { 1176 br = FULL; 1177 } else if (diff_tsc > MIN_CYCLES && 1178 diff_tsc < MAX_CYCLES) { 1179 br = (diff_tsc * 100) / MAX_CYCLES; 1180 } else { 1181 br = ZERO; 1182 } 1183 poll_count = 0; 1184 prev_tel_tsc = cur_tsc; 1185 /* update stats for telemetry */ 1186 rte_spinlock_lock(&stats[lcore_id].telemetry_lock); 1187 stats[lcore_id].ep_nep[0] = ep_nep[0]; 1188 stats[lcore_id].ep_nep[1] = ep_nep[1]; 1189 stats[lcore_id].fp_nfp[0] = fp_nfp[0]; 1190 stats[lcore_id].fp_nfp[1] = fp_nfp[1]; 1191 stats[lcore_id].br = br; 1192 rte_spinlock_unlock(&stats[lcore_id].telemetry_lock); 1193 } 1194 } 1195 1196 return 0; 1197 } 1198 1199 /* main processing loop */ 1200 static int 1201 main_legacy_loop(__rte_unused void *dummy) 1202 { 1203 struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; 1204 unsigned lcore_id; 1205 uint64_t prev_tsc, diff_tsc, cur_tsc, tim_res_tsc, hz; 1206 uint64_t prev_tsc_power = 0, cur_tsc_power, diff_tsc_power; 1207 int i, j, nb_rx; 1208 uint8_t queueid; 1209 uint16_t portid; 1210 struct lcore_conf *qconf; 1211 struct lcore_rx_queue *rx_queue; 1212 enum freq_scale_hint_t lcore_scaleup_hint; 1213 uint32_t lcore_rx_idle_count = 0; 1214 uint32_t lcore_idle_hint = 0; 1215 int intr_en = 0; 1216 1217 const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * BURST_TX_DRAIN_US; 1218 1219 prev_tsc = 0; 1220 hz = rte_get_timer_hz(); 1221 tim_res_tsc = hz/TIMER_NUMBER_PER_SECOND; 1222 1223 lcore_id = rte_lcore_id(); 1224 qconf = &lcore_conf[lcore_id]; 1225 1226 if (qconf->n_rx_queue == 0) { 1227 RTE_LOG(INFO, L3FWD_POWER, "lcore %u has nothing to do\n", lcore_id); 1228 return 0; 1229 } 1230 1231 RTE_LOG(INFO, L3FWD_POWER, "entering main loop on lcore %u\n", lcore_id); 1232 1233 for (i = 0; i < qconf->n_rx_queue; i++) { 1234 portid = qconf->rx_queue_list[i].port_id; 1235 queueid = qconf->rx_queue_list[i].queue_id; 1236 RTE_LOG(INFO, L3FWD_POWER, " -- lcoreid=%u portid=%u " 1237 "rxqueueid=%hhu\n", lcore_id, portid, queueid); 1238 } 1239 1240 /* add into event wait list */ 1241 if (event_register(qconf) == 0) 1242 intr_en = 1; 1243 else 1244 RTE_LOG(INFO, L3FWD_POWER, "RX interrupt won't enable.\n"); 1245 1246 while (!is_done()) { 1247 stats[lcore_id].nb_iteration_looped++; 1248 1249 cur_tsc = rte_rdtsc(); 1250 cur_tsc_power = cur_tsc; 1251 1252 /* 1253 * TX burst queue drain 1254 */ 1255 diff_tsc = cur_tsc - prev_tsc; 1256 if (unlikely(diff_tsc > drain_tsc)) { 1257 for (i = 0; i < qconf->n_tx_port; ++i) { 1258 portid = qconf->tx_port_id[i]; 1259 rte_eth_tx_buffer_flush(portid, 1260 qconf->tx_queue_id[portid], 1261 qconf->tx_buffer[portid]); 1262 } 1263 prev_tsc = cur_tsc; 1264 } 1265 1266 diff_tsc_power = cur_tsc_power - prev_tsc_power; 1267 if (diff_tsc_power > tim_res_tsc) { 1268 rte_timer_manage(); 1269 prev_tsc_power = cur_tsc_power; 1270 } 1271 1272 start_rx: 1273 /* 1274 * Read packet from RX queues 1275 */ 1276 lcore_scaleup_hint = FREQ_CURRENT; 1277 lcore_rx_idle_count = 0; 1278 for (i = 0; i < qconf->n_rx_queue; ++i) { 1279 rx_queue = &(qconf->rx_queue_list[i]); 1280 rx_queue->idle_hint = 0; 1281 portid = rx_queue->port_id; 1282 queueid = rx_queue->queue_id; 1283 1284 nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst, 1285 MAX_PKT_BURST); 1286 1287 stats[lcore_id].nb_rx_processed += nb_rx; 1288 if (unlikely(nb_rx == 0)) { 1289 /** 1290 * no packet received from rx queue, try to 1291 * sleep for a while forcing CPU enter deeper 1292 * C states. 1293 */ 1294 rx_queue->zero_rx_packet_count++; 1295 1296 if (rx_queue->zero_rx_packet_count <= 1297 MIN_ZERO_POLL_COUNT) 1298 continue; 1299 1300 rx_queue->idle_hint = power_idle_heuristic(\ 1301 rx_queue->zero_rx_packet_count); 1302 lcore_rx_idle_count++; 1303 } else { 1304 rx_queue->zero_rx_packet_count = 0; 1305 1306 /** 1307 * do not scale up frequency immediately as 1308 * user to kernel space communication is costly 1309 * which might impact packet I/O for received 1310 * packets. 1311 */ 1312 rx_queue->freq_up_hint = 1313 power_freq_scaleup_heuristic(lcore_id, 1314 portid, queueid); 1315 } 1316 1317 /* Prefetch first packets */ 1318 for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++) { 1319 rte_prefetch0(rte_pktmbuf_mtod( 1320 pkts_burst[j], void *)); 1321 } 1322 1323 /* Prefetch and forward already prefetched packets */ 1324 for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) { 1325 rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[ 1326 j + PREFETCH_OFFSET], void *)); 1327 l3fwd_simple_forward(pkts_burst[j], portid, 1328 qconf); 1329 } 1330 1331 /* Forward remaining prefetched packets */ 1332 for (; j < nb_rx; j++) { 1333 l3fwd_simple_forward(pkts_burst[j], portid, 1334 qconf); 1335 } 1336 } 1337 1338 if (likely(lcore_rx_idle_count != qconf->n_rx_queue)) { 1339 for (i = 1, lcore_scaleup_hint = 1340 qconf->rx_queue_list[0].freq_up_hint; 1341 i < qconf->n_rx_queue; ++i) { 1342 rx_queue = &(qconf->rx_queue_list[i]); 1343 if (rx_queue->freq_up_hint > 1344 lcore_scaleup_hint) 1345 lcore_scaleup_hint = 1346 rx_queue->freq_up_hint; 1347 } 1348 1349 if (lcore_scaleup_hint == FREQ_HIGHEST) { 1350 if (rte_power_freq_max) 1351 rte_power_freq_max(lcore_id); 1352 } else if (lcore_scaleup_hint == FREQ_HIGHER) { 1353 if (rte_power_freq_up) 1354 rte_power_freq_up(lcore_id); 1355 } 1356 } else { 1357 /** 1358 * All Rx queues empty in recent consecutive polls, 1359 * sleep in a conservative manner, meaning sleep as 1360 * less as possible. 1361 */ 1362 for (i = 1, lcore_idle_hint = 1363 qconf->rx_queue_list[0].idle_hint; 1364 i < qconf->n_rx_queue; ++i) { 1365 rx_queue = &(qconf->rx_queue_list[i]); 1366 if (rx_queue->idle_hint < lcore_idle_hint) 1367 lcore_idle_hint = rx_queue->idle_hint; 1368 } 1369 1370 if (lcore_idle_hint < SUSPEND_THRESHOLD) 1371 /** 1372 * execute "pause" instruction to avoid context 1373 * switch which generally take hundred of 1374 * microseconds for short sleep. 1375 */ 1376 rte_delay_us(lcore_idle_hint); 1377 else { 1378 /* suspend until rx interrupt triggers */ 1379 if (intr_en) { 1380 turn_on_off_intr(qconf, 1); 1381 sleep_until_rx_interrupt( 1382 qconf->n_rx_queue, 1383 lcore_id); 1384 turn_on_off_intr(qconf, 0); 1385 /** 1386 * start receiving packets immediately 1387 */ 1388 if (likely(!is_done())) 1389 goto start_rx; 1390 } 1391 } 1392 stats[lcore_id].sleep_time += lcore_idle_hint; 1393 } 1394 } 1395 1396 return 0; 1397 } 1398 1399 static int 1400 check_lcore_params(void) 1401 { 1402 uint8_t queue, lcore; 1403 uint16_t i; 1404 int socketid; 1405 1406 for (i = 0; i < nb_lcore_params; ++i) { 1407 queue = lcore_params[i].queue_id; 1408 if (queue >= MAX_RX_QUEUE_PER_PORT) { 1409 printf("invalid queue number: %hhu\n", queue); 1410 return -1; 1411 } 1412 lcore = lcore_params[i].lcore_id; 1413 if (!rte_lcore_is_enabled(lcore)) { 1414 printf("error: lcore %hhu is not enabled in lcore " 1415 "mask\n", lcore); 1416 return -1; 1417 } 1418 if ((socketid = rte_lcore_to_socket_id(lcore) != 0) && 1419 (numa_on == 0)) { 1420 printf("warning: lcore %hhu is on socket %d with numa " 1421 "off\n", lcore, socketid); 1422 } 1423 if (app_mode == APP_MODE_TELEMETRY && lcore == rte_lcore_id()) { 1424 printf("cannot enable main core %d in config for telemetry mode\n", 1425 rte_lcore_id()); 1426 return -1; 1427 } 1428 } 1429 return 0; 1430 } 1431 1432 static int 1433 check_port_config(void) 1434 { 1435 unsigned portid; 1436 uint16_t i; 1437 1438 for (i = 0; i < nb_lcore_params; ++i) { 1439 portid = lcore_params[i].port_id; 1440 if ((enabled_port_mask & (1 << portid)) == 0) { 1441 printf("port %u is not enabled in port mask\n", 1442 portid); 1443 return -1; 1444 } 1445 if (!rte_eth_dev_is_valid_port(portid)) { 1446 printf("port %u is not present on the board\n", 1447 portid); 1448 return -1; 1449 } 1450 } 1451 return 0; 1452 } 1453 1454 static uint8_t 1455 get_port_n_rx_queues(const uint16_t port) 1456 { 1457 int queue = -1; 1458 uint16_t i; 1459 1460 for (i = 0; i < nb_lcore_params; ++i) { 1461 if (lcore_params[i].port_id == port && 1462 lcore_params[i].queue_id > queue) 1463 queue = lcore_params[i].queue_id; 1464 } 1465 return (uint8_t)(++queue); 1466 } 1467 1468 static int 1469 init_lcore_rx_queues(void) 1470 { 1471 uint16_t i, nb_rx_queue; 1472 uint8_t lcore; 1473 1474 for (i = 0; i < nb_lcore_params; ++i) { 1475 lcore = lcore_params[i].lcore_id; 1476 nb_rx_queue = lcore_conf[lcore].n_rx_queue; 1477 if (nb_rx_queue >= MAX_RX_QUEUE_PER_LCORE) { 1478 printf("error: too many queues (%u) for lcore: %u\n", 1479 (unsigned)nb_rx_queue + 1, (unsigned)lcore); 1480 return -1; 1481 } else { 1482 lcore_conf[lcore].rx_queue_list[nb_rx_queue].port_id = 1483 lcore_params[i].port_id; 1484 lcore_conf[lcore].rx_queue_list[nb_rx_queue].queue_id = 1485 lcore_params[i].queue_id; 1486 lcore_conf[lcore].n_rx_queue++; 1487 } 1488 } 1489 return 0; 1490 } 1491 1492 /* display usage */ 1493 static void 1494 print_usage(const char *prgname) 1495 { 1496 printf ("%s [EAL options] -- -p PORTMASK -P" 1497 " [--config (port,queue,lcore)[,(port,queue,lcore]]" 1498 " [--high-perf-cores CORELIST" 1499 " [--perf-config (port,queue,hi_perf,lcore_index)[,(port,queue,hi_perf,lcore_index]]" 1500 " [--max-pkt-len PKTLEN]\n" 1501 " -p PORTMASK: hexadecimal bitmask of ports to configure\n" 1502 " -P: enable promiscuous mode\n" 1503 " -u: set min/max frequency for uncore to minimum value\n" 1504 " -U: set min/max frequency for uncore to maximum value\n" 1505 " -i (frequency index): set min/max frequency for uncore to specified frequency index\n" 1506 " --config (port,queue,lcore): rx queues configuration\n" 1507 " --high-perf-cores CORELIST: list of high performance cores\n" 1508 " --perf-config: similar as config, cores specified as indices" 1509 " for bins containing high or regular performance cores\n" 1510 " --no-numa: optional, disable numa awareness\n" 1511 " --max-pkt-len PKTLEN: maximum packet length in decimal (64-9600)\n" 1512 " --parse-ptype: parse packet type by software\n" 1513 " --legacy: use legacy interrupt-based scaling\n" 1514 " --telemetry: enable telemetry mode, to update" 1515 " empty polls, full polls, and core busyness to telemetry\n" 1516 " --interrupt-only: enable interrupt-only mode\n" 1517 " --pmd-mgmt MODE: enable PMD power management mode. " 1518 "Currently supported modes: baseline, monitor, pause, scale\n" 1519 " --max-empty-polls MAX_EMPTY_POLLS: number of empty polls to" 1520 " wait before entering sleep state\n" 1521 " --pause-duration DURATION: set the duration, in microseconds," 1522 " of the pause callback\n" 1523 " --scale-freq-min FREQ_MIN: set minimum frequency for scaling mode for" 1524 " all application lcores (FREQ_MIN must be in kHz, in increments of 100MHz)\n" 1525 " --scale-freq-max FREQ_MAX: set maximum frequency for scaling mode for" 1526 " all application lcores (FREQ_MAX must be in kHz, in increments of 100MHz)\n", 1527 prgname); 1528 } 1529 1530 static int 1531 parse_int(const char *opt) 1532 { 1533 char *end = NULL; 1534 unsigned long val; 1535 1536 /* parse integer string */ 1537 val = strtoul(opt, &end, 10); 1538 if ((opt[0] == '\0') || (end == NULL) || (*end != '\0')) 1539 return -1; 1540 1541 return val; 1542 } 1543 1544 static int parse_max_pkt_len(const char *pktlen) 1545 { 1546 char *end = NULL; 1547 unsigned long len; 1548 1549 /* parse decimal string */ 1550 len = strtoul(pktlen, &end, 10); 1551 if ((pktlen[0] == '\0') || (end == NULL) || (*end != '\0')) 1552 return -1; 1553 1554 if (len == 0) 1555 return -1; 1556 1557 return len; 1558 } 1559 1560 static int 1561 parse_uncore_options(enum uncore_choice choice, const char *argument) 1562 { 1563 unsigned int die, pkg, max_pkg, max_die; 1564 int ret = 0; 1565 ret = rte_power_set_uncore_env(RTE_UNCORE_PM_ENV_AUTO_DETECT); 1566 if (ret < 0) { 1567 RTE_LOG(INFO, L3FWD_POWER, "Failed to set uncore env\n"); 1568 return ret; 1569 } 1570 1571 max_pkg = rte_power_uncore_get_num_pkgs(); 1572 if (max_pkg == 0) 1573 return -1; 1574 1575 for (pkg = 0; pkg < max_pkg; pkg++) { 1576 max_die = rte_power_uncore_get_num_dies(pkg); 1577 if (max_die == 0) 1578 return -1; 1579 for (die = 0; die < max_die; die++) { 1580 ret = rte_power_uncore_init(pkg, die); 1581 if (ret == -1) { 1582 RTE_LOG(INFO, L3FWD_POWER, "Unable to initialize uncore for pkg %02u die %02u\n" 1583 , pkg, die); 1584 return ret; 1585 } 1586 if (choice == UNCORE_MIN) { 1587 ret = rte_power_uncore_freq_min(pkg, die); 1588 if (ret == -1) { 1589 RTE_LOG(INFO, L3FWD_POWER, 1590 "Unable to set the uncore min/max to minimum uncore frequency value for pkg %02u die %02u\n" 1591 , pkg, die); 1592 return ret; 1593 } 1594 } else if (choice == UNCORE_MAX) { 1595 ret = rte_power_uncore_freq_max(pkg, die); 1596 if (ret == -1) { 1597 RTE_LOG(INFO, L3FWD_POWER, 1598 "Unable to set uncore min/max to maximum uncore frequency value for pkg %02u die %02u\n" 1599 , pkg, die); 1600 return ret; 1601 } 1602 } else if (choice == UNCORE_IDX) { 1603 char *ptr = NULL; 1604 int frequency_index = strtol(argument, &ptr, 10); 1605 if (argument == ptr) { 1606 RTE_LOG(INFO, L3FWD_POWER, "Index given is not a valid number."); 1607 return -1; 1608 } 1609 int freq_array_len = rte_power_uncore_get_num_freqs(pkg, die); 1610 if (frequency_index > freq_array_len - 1) { 1611 RTE_LOG(INFO, L3FWD_POWER, 1612 "Frequency index given out of range, please choose a value from 0 to %d.\n", 1613 freq_array_len); 1614 return -1; 1615 } 1616 ret = rte_power_set_uncore_freq(pkg, die, frequency_index); 1617 if (ret == -1) { 1618 RTE_LOG(INFO, L3FWD_POWER, 1619 "Unable to set min/max uncore index value for pkg %02u die %02u\n", 1620 pkg, die); 1621 return ret; 1622 } 1623 } else { 1624 RTE_LOG(INFO, L3FWD_POWER, "Uncore choice provided invalid\n"); 1625 return -1; 1626 } 1627 } 1628 } 1629 1630 RTE_LOG(INFO, L3FWD_POWER, "Successfully set max/min/index uncore frequency.\n"); 1631 return ret; 1632 } 1633 1634 static int 1635 parse_portmask(const char *portmask) 1636 { 1637 char *end = NULL; 1638 unsigned long pm; 1639 1640 /* parse hexadecimal string */ 1641 pm = strtoul(portmask, &end, 16); 1642 if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0')) 1643 return 0; 1644 1645 return pm; 1646 } 1647 1648 static int 1649 parse_config(const char *q_arg) 1650 { 1651 char s[256]; 1652 const char *p, *p0 = q_arg; 1653 char *end; 1654 enum fieldnames { 1655 FLD_PORT = 0, 1656 FLD_QUEUE, 1657 FLD_LCORE, 1658 _NUM_FLD 1659 }; 1660 unsigned long int_fld[_NUM_FLD]; 1661 char *str_fld[_NUM_FLD]; 1662 int i; 1663 unsigned size; 1664 1665 nb_lcore_params = 0; 1666 1667 while ((p = strchr(p0,'(')) != NULL) { 1668 ++p; 1669 if((p0 = strchr(p,')')) == NULL) 1670 return -1; 1671 1672 size = p0 - p; 1673 if(size >= sizeof(s)) 1674 return -1; 1675 1676 snprintf(s, sizeof(s), "%.*s", size, p); 1677 if (rte_strsplit(s, sizeof(s), str_fld, _NUM_FLD, ',') != 1678 _NUM_FLD) 1679 return -1; 1680 for (i = 0; i < _NUM_FLD; i++){ 1681 errno = 0; 1682 int_fld[i] = strtoul(str_fld[i], &end, 0); 1683 if (errno != 0 || end == str_fld[i] || int_fld[i] > 1684 255) 1685 return -1; 1686 } 1687 if (nb_lcore_params >= MAX_LCORE_PARAMS) { 1688 printf("exceeded max number of lcore params: %hu\n", 1689 nb_lcore_params); 1690 return -1; 1691 } 1692 lcore_params_array[nb_lcore_params].port_id = 1693 (uint8_t)int_fld[FLD_PORT]; 1694 lcore_params_array[nb_lcore_params].queue_id = 1695 (uint8_t)int_fld[FLD_QUEUE]; 1696 lcore_params_array[nb_lcore_params].lcore_id = 1697 (uint8_t)int_fld[FLD_LCORE]; 1698 ++nb_lcore_params; 1699 } 1700 lcore_params = lcore_params_array; 1701 1702 return 0; 1703 } 1704 1705 static int 1706 parse_pmd_mgmt_config(const char *name) 1707 { 1708 #define PMD_MGMT_MONITOR "monitor" 1709 #define PMD_MGMT_PAUSE "pause" 1710 #define PMD_MGMT_SCALE "scale" 1711 #define PMD_MGMT_BASELINE "baseline" 1712 1713 if (strncmp(PMD_MGMT_MONITOR, name, sizeof(PMD_MGMT_MONITOR)) == 0) { 1714 pmgmt_type = RTE_POWER_MGMT_TYPE_MONITOR; 1715 return 0; 1716 } 1717 1718 if (strncmp(PMD_MGMT_PAUSE, name, sizeof(PMD_MGMT_PAUSE)) == 0) { 1719 pmgmt_type = RTE_POWER_MGMT_TYPE_PAUSE; 1720 return 0; 1721 } 1722 1723 if (strncmp(PMD_MGMT_SCALE, name, sizeof(PMD_MGMT_SCALE)) == 0) { 1724 pmgmt_type = RTE_POWER_MGMT_TYPE_SCALE; 1725 return 0; 1726 } 1727 if (strncmp(PMD_MGMT_BASELINE, name, sizeof(PMD_MGMT_BASELINE)) == 0) { 1728 baseline_enabled = true; 1729 return 0; 1730 } 1731 /* unknown PMD power management mode */ 1732 return -1; 1733 } 1734 1735 #define CMD_LINE_OPT_PARSE_PTYPE "parse-ptype" 1736 #define CMD_LINE_OPT_LEGACY "legacy" 1737 #define CMD_LINE_OPT_INTERRUPT_ONLY "interrupt-only" 1738 #define CMD_LINE_OPT_TELEMETRY "telemetry" 1739 #define CMD_LINE_OPT_PMD_MGMT "pmd-mgmt" 1740 #define CMD_LINE_OPT_MAX_PKT_LEN "max-pkt-len" 1741 #define CMD_LINE_OPT_MAX_EMPTY_POLLS "max-empty-polls" 1742 #define CMD_LINE_OPT_PAUSE_DURATION "pause-duration" 1743 #define CMD_LINE_OPT_SCALE_FREQ_MIN "scale-freq-min" 1744 #define CMD_LINE_OPT_SCALE_FREQ_MAX "scale-freq-max" 1745 1746 /* Parse the argument given in the command line of the application */ 1747 static int 1748 parse_args(int argc, char **argv) 1749 { 1750 int opt, ret; 1751 char **argvopt; 1752 int option_index; 1753 char *prgname = argv[0]; 1754 static struct option lgopts[] = { 1755 {"config", 1, 0, 0}, 1756 {"perf-config", 1, 0, 0}, 1757 {"high-perf-cores", 1, 0, 0}, 1758 {"no-numa", 0, 0, 0}, 1759 {CMD_LINE_OPT_MAX_PKT_LEN, 1, 0, 0}, 1760 {CMD_LINE_OPT_PARSE_PTYPE, 0, 0, 0}, 1761 {CMD_LINE_OPT_LEGACY, 0, 0, 0}, 1762 {CMD_LINE_OPT_TELEMETRY, 0, 0, 0}, 1763 {CMD_LINE_OPT_INTERRUPT_ONLY, 0, 0, 0}, 1764 {CMD_LINE_OPT_PMD_MGMT, 1, 0, 0}, 1765 {CMD_LINE_OPT_MAX_EMPTY_POLLS, 1, 0, 0}, 1766 {CMD_LINE_OPT_PAUSE_DURATION, 1, 0, 0}, 1767 {CMD_LINE_OPT_SCALE_FREQ_MIN, 1, 0, 0}, 1768 {CMD_LINE_OPT_SCALE_FREQ_MAX, 1, 0, 0}, 1769 {NULL, 0, 0, 0} 1770 }; 1771 1772 argvopt = argv; 1773 1774 while ((opt = getopt_long(argc, argvopt, "p:PuUi:", 1775 lgopts, &option_index)) != EOF) { 1776 1777 switch (opt) { 1778 /* portmask */ 1779 case 'p': 1780 enabled_port_mask = parse_portmask(optarg); 1781 if (enabled_port_mask == 0) { 1782 printf("invalid portmask\n"); 1783 print_usage(prgname); 1784 return -1; 1785 } 1786 break; 1787 case 'P': 1788 printf("Promiscuous mode selected\n"); 1789 promiscuous_on = 1; 1790 break; 1791 case 'u': 1792 enabled_uncore = parse_uncore_options(UNCORE_MIN, NULL); 1793 if (enabled_uncore < 0) { 1794 print_usage(prgname); 1795 return -1; 1796 } 1797 break; 1798 case 'U': 1799 enabled_uncore = parse_uncore_options(UNCORE_MAX, NULL); 1800 if (enabled_uncore < 0) { 1801 print_usage(prgname); 1802 return -1; 1803 } 1804 break; 1805 case 'i': 1806 enabled_uncore = parse_uncore_options(UNCORE_IDX, optarg); 1807 if (enabled_uncore < 0) { 1808 print_usage(prgname); 1809 return -1; 1810 } 1811 break; 1812 /* long options */ 1813 case 0: 1814 if (!strncmp(lgopts[option_index].name, "config", 6)) { 1815 ret = parse_config(optarg); 1816 if (ret) { 1817 printf("invalid config\n"); 1818 print_usage(prgname); 1819 return -1; 1820 } 1821 } 1822 1823 if (!strncmp(lgopts[option_index].name, 1824 "perf-config", 11)) { 1825 ret = parse_perf_config(optarg); 1826 if (ret) { 1827 printf("invalid perf-config\n"); 1828 print_usage(prgname); 1829 return -1; 1830 } 1831 } 1832 1833 if (!strncmp(lgopts[option_index].name, 1834 "high-perf-cores", 15)) { 1835 ret = parse_perf_core_list(optarg); 1836 if (ret) { 1837 printf("invalid high-perf-cores\n"); 1838 print_usage(prgname); 1839 return -1; 1840 } 1841 } 1842 1843 if (!strncmp(lgopts[option_index].name, 1844 "no-numa", 7)) { 1845 printf("numa is disabled \n"); 1846 numa_on = 0; 1847 } 1848 1849 if (!strncmp(lgopts[option_index].name, 1850 CMD_LINE_OPT_LEGACY, 1851 sizeof(CMD_LINE_OPT_LEGACY))) { 1852 if (app_mode != APP_MODE_DEFAULT) { 1853 printf(" legacy mode is mutually exclusive with other modes\n"); 1854 return -1; 1855 } 1856 app_mode = APP_MODE_LEGACY; 1857 printf("legacy mode is enabled\n"); 1858 } 1859 1860 if (!strncmp(lgopts[option_index].name, 1861 CMD_LINE_OPT_TELEMETRY, 1862 sizeof(CMD_LINE_OPT_TELEMETRY))) { 1863 if (app_mode != APP_MODE_DEFAULT) { 1864 printf(" telemetry mode is mutually exclusive with other modes\n"); 1865 return -1; 1866 } 1867 app_mode = APP_MODE_TELEMETRY; 1868 printf("telemetry mode is enabled\n"); 1869 } 1870 1871 if (!strncmp(lgopts[option_index].name, 1872 CMD_LINE_OPT_PMD_MGMT, 1873 sizeof(CMD_LINE_OPT_PMD_MGMT))) { 1874 if (app_mode != APP_MODE_DEFAULT) { 1875 printf(" power mgmt mode is mutually exclusive with other modes\n"); 1876 return -1; 1877 } 1878 if (parse_pmd_mgmt_config(optarg) < 0) { 1879 printf(" Invalid PMD power management mode: %s\n", 1880 optarg); 1881 return -1; 1882 } 1883 app_mode = APP_MODE_PMD_MGMT; 1884 printf("PMD power mgmt mode is enabled\n"); 1885 } 1886 if (!strncmp(lgopts[option_index].name, 1887 CMD_LINE_OPT_INTERRUPT_ONLY, 1888 sizeof(CMD_LINE_OPT_INTERRUPT_ONLY))) { 1889 if (app_mode != APP_MODE_DEFAULT) { 1890 printf(" interrupt-only mode is mutually exclusive with other modes\n"); 1891 return -1; 1892 } 1893 app_mode = APP_MODE_INTERRUPT; 1894 printf("interrupt-only mode is enabled\n"); 1895 } 1896 1897 if (!strncmp(lgopts[option_index].name, 1898 CMD_LINE_OPT_MAX_PKT_LEN, 1899 sizeof(CMD_LINE_OPT_MAX_PKT_LEN))) { 1900 printf("Custom frame size is configured\n"); 1901 max_pkt_len = parse_max_pkt_len(optarg); 1902 } 1903 1904 if (!strncmp(lgopts[option_index].name, 1905 CMD_LINE_OPT_PARSE_PTYPE, 1906 sizeof(CMD_LINE_OPT_PARSE_PTYPE))) { 1907 printf("soft parse-ptype is enabled\n"); 1908 parse_ptype = 1; 1909 } 1910 1911 if (!strncmp(lgopts[option_index].name, 1912 CMD_LINE_OPT_MAX_EMPTY_POLLS, 1913 sizeof(CMD_LINE_OPT_MAX_EMPTY_POLLS))) { 1914 printf("Maximum empty polls configured\n"); 1915 max_empty_polls = parse_int(optarg); 1916 } 1917 1918 if (!strncmp(lgopts[option_index].name, 1919 CMD_LINE_OPT_PAUSE_DURATION, 1920 sizeof(CMD_LINE_OPT_PAUSE_DURATION))) { 1921 printf("Pause duration configured\n"); 1922 pause_duration = parse_int(optarg); 1923 } 1924 1925 if (!strncmp(lgopts[option_index].name, 1926 CMD_LINE_OPT_SCALE_FREQ_MIN, 1927 sizeof(CMD_LINE_OPT_SCALE_FREQ_MIN))) { 1928 printf("Scaling frequency minimum configured\n"); 1929 scale_freq_min = parse_int(optarg); 1930 } 1931 1932 if (!strncmp(lgopts[option_index].name, 1933 CMD_LINE_OPT_SCALE_FREQ_MAX, 1934 sizeof(CMD_LINE_OPT_SCALE_FREQ_MAX))) { 1935 printf("Scaling frequency maximum configured\n"); 1936 scale_freq_max = parse_int(optarg); 1937 } 1938 1939 break; 1940 1941 default: 1942 print_usage(prgname); 1943 return -1; 1944 } 1945 } 1946 1947 if (optind >= 0) 1948 argv[optind-1] = prgname; 1949 1950 ret = optind-1; 1951 optind = 1; /* reset getopt lib */ 1952 return ret; 1953 } 1954 1955 static void 1956 print_ethaddr(const char *name, const struct rte_ether_addr *eth_addr) 1957 { 1958 char buf[RTE_ETHER_ADDR_FMT_SIZE]; 1959 rte_ether_format_addr(buf, RTE_ETHER_ADDR_FMT_SIZE, eth_addr); 1960 printf("%s%s", name, buf); 1961 } 1962 1963 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 1964 static void 1965 setup_hash(int socketid) 1966 { 1967 struct rte_hash_parameters ipv4_l3fwd_hash_params = { 1968 .name = NULL, 1969 .entries = L3FWD_HASH_ENTRIES, 1970 .key_len = sizeof(struct ipv4_5tuple), 1971 .hash_func = DEFAULT_HASH_FUNC, 1972 .hash_func_init_val = 0, 1973 }; 1974 1975 struct rte_hash_parameters ipv6_l3fwd_hash_params = { 1976 .name = NULL, 1977 .entries = L3FWD_HASH_ENTRIES, 1978 .key_len = sizeof(struct ipv6_5tuple), 1979 .hash_func = DEFAULT_HASH_FUNC, 1980 .hash_func_init_val = 0, 1981 }; 1982 1983 unsigned i; 1984 int ret; 1985 char s[64]; 1986 1987 /* create ipv4 hash */ 1988 snprintf(s, sizeof(s), "ipv4_l3fwd_hash_%d", socketid); 1989 ipv4_l3fwd_hash_params.name = s; 1990 ipv4_l3fwd_hash_params.socket_id = socketid; 1991 ipv4_l3fwd_lookup_struct[socketid] = 1992 rte_hash_create(&ipv4_l3fwd_hash_params); 1993 if (ipv4_l3fwd_lookup_struct[socketid] == NULL) 1994 rte_exit(EXIT_FAILURE, "Unable to create the l3fwd hash on " 1995 "socket %d\n", socketid); 1996 1997 /* create ipv6 hash */ 1998 snprintf(s, sizeof(s), "ipv6_l3fwd_hash_%d", socketid); 1999 ipv6_l3fwd_hash_params.name = s; 2000 ipv6_l3fwd_hash_params.socket_id = socketid; 2001 ipv6_l3fwd_lookup_struct[socketid] = 2002 rte_hash_create(&ipv6_l3fwd_hash_params); 2003 if (ipv6_l3fwd_lookup_struct[socketid] == NULL) 2004 rte_exit(EXIT_FAILURE, "Unable to create the l3fwd hash on " 2005 "socket %d\n", socketid); 2006 2007 2008 /* populate the ipv4 hash */ 2009 for (i = 0; i < RTE_DIM(ipv4_l3fwd_route_array); i++) { 2010 ret = rte_hash_add_key (ipv4_l3fwd_lookup_struct[socketid], 2011 (void *) &ipv4_l3fwd_route_array[i].key); 2012 if (ret < 0) { 2013 rte_exit(EXIT_FAILURE, "Unable to add entry %u to the" 2014 "l3fwd hash on socket %d\n", i, socketid); 2015 } 2016 ipv4_l3fwd_out_if[ret] = ipv4_l3fwd_route_array[i].if_out; 2017 printf("Hash: Adding key\n"); 2018 print_ipv4_key(ipv4_l3fwd_route_array[i].key); 2019 } 2020 2021 /* populate the ipv6 hash */ 2022 for (i = 0; i < RTE_DIM(ipv6_l3fwd_route_array); i++) { 2023 ret = rte_hash_add_key (ipv6_l3fwd_lookup_struct[socketid], 2024 (void *) &ipv6_l3fwd_route_array[i].key); 2025 if (ret < 0) { 2026 rte_exit(EXIT_FAILURE, "Unable to add entry %u to the" 2027 "l3fwd hash on socket %d\n", i, socketid); 2028 } 2029 ipv6_l3fwd_out_if[ret] = ipv6_l3fwd_route_array[i].if_out; 2030 printf("Hash: Adding key\n"); 2031 print_ipv6_key(ipv6_l3fwd_route_array[i].key); 2032 } 2033 } 2034 #endif 2035 2036 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 2037 static void 2038 setup_lpm(int socketid) 2039 { 2040 unsigned i; 2041 int ret; 2042 char s[64]; 2043 2044 /* create the LPM table */ 2045 struct rte_lpm_config lpm_ipv4_config; 2046 2047 lpm_ipv4_config.max_rules = IPV4_L3FWD_LPM_MAX_RULES; 2048 lpm_ipv4_config.number_tbl8s = 256; 2049 lpm_ipv4_config.flags = 0; 2050 2051 snprintf(s, sizeof(s), "IPV4_L3FWD_LPM_%d", socketid); 2052 ipv4_l3fwd_lookup_struct[socketid] = 2053 rte_lpm_create(s, socketid, &lpm_ipv4_config); 2054 if (ipv4_l3fwd_lookup_struct[socketid] == NULL) 2055 rte_exit(EXIT_FAILURE, "Unable to create the l3fwd LPM table" 2056 " on socket %d\n", socketid); 2057 2058 /* populate the LPM table */ 2059 for (i = 0; i < RTE_DIM(ipv4_l3fwd_route_array); i++) { 2060 ret = rte_lpm_add(ipv4_l3fwd_lookup_struct[socketid], 2061 ipv4_l3fwd_route_array[i].ip, 2062 ipv4_l3fwd_route_array[i].depth, 2063 ipv4_l3fwd_route_array[i].if_out); 2064 2065 if (ret < 0) { 2066 rte_exit(EXIT_FAILURE, "Unable to add entry %u to the " 2067 "l3fwd LPM table on socket %d\n", 2068 i, socketid); 2069 } 2070 2071 printf("LPM: Adding route 0x%08x / %d (%d)\n", 2072 (unsigned)ipv4_l3fwd_route_array[i].ip, 2073 ipv4_l3fwd_route_array[i].depth, 2074 ipv4_l3fwd_route_array[i].if_out); 2075 } 2076 } 2077 #endif 2078 2079 static int 2080 init_mem(unsigned nb_mbuf) 2081 { 2082 struct lcore_conf *qconf; 2083 int socketid; 2084 unsigned lcore_id; 2085 char s[64]; 2086 2087 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { 2088 if (rte_lcore_is_enabled(lcore_id) == 0) 2089 continue; 2090 2091 if (numa_on) 2092 socketid = rte_lcore_to_socket_id(lcore_id); 2093 else 2094 socketid = 0; 2095 2096 if (socketid >= NB_SOCKETS) { 2097 rte_exit(EXIT_FAILURE, "Socket %d of lcore %u is " 2098 "out of range %d\n", socketid, 2099 lcore_id, NB_SOCKETS); 2100 } 2101 if (pktmbuf_pool[socketid] == NULL) { 2102 snprintf(s, sizeof(s), "mbuf_pool_%d", socketid); 2103 pktmbuf_pool[socketid] = 2104 rte_pktmbuf_pool_create(s, nb_mbuf, 2105 MEMPOOL_CACHE_SIZE, 0, 2106 RTE_MBUF_DEFAULT_BUF_SIZE, 2107 socketid); 2108 if (pktmbuf_pool[socketid] == NULL) 2109 rte_exit(EXIT_FAILURE, 2110 "Cannot init mbuf pool on socket %d\n", 2111 socketid); 2112 else 2113 printf("Allocated mbuf pool on socket %d\n", 2114 socketid); 2115 2116 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 2117 setup_lpm(socketid); 2118 #else 2119 setup_hash(socketid); 2120 #endif 2121 } 2122 qconf = &lcore_conf[lcore_id]; 2123 qconf->ipv4_lookup_struct = ipv4_l3fwd_lookup_struct[socketid]; 2124 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 2125 qconf->ipv6_lookup_struct = ipv6_l3fwd_lookup_struct[socketid]; 2126 #endif 2127 } 2128 return 0; 2129 } 2130 2131 /* Check the link status of all ports in up to 9s, and print them finally */ 2132 static void 2133 check_all_ports_link_status(uint32_t port_mask) 2134 { 2135 #define CHECK_INTERVAL 100 /* 100ms */ 2136 #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */ 2137 uint8_t count, all_ports_up, print_flag = 0; 2138 uint16_t portid; 2139 struct rte_eth_link link; 2140 int ret; 2141 char link_status_text[RTE_ETH_LINK_MAX_STR_LEN]; 2142 2143 printf("\nChecking link status"); 2144 fflush(stdout); 2145 for (count = 0; count <= MAX_CHECK_TIME; count++) { 2146 all_ports_up = 1; 2147 RTE_ETH_FOREACH_DEV(portid) { 2148 if ((port_mask & (1 << portid)) == 0) 2149 continue; 2150 memset(&link, 0, sizeof(link)); 2151 ret = rte_eth_link_get_nowait(portid, &link); 2152 if (ret < 0) { 2153 all_ports_up = 0; 2154 if (print_flag == 1) 2155 printf("Port %u link get failed: %s\n", 2156 portid, rte_strerror(-ret)); 2157 continue; 2158 } 2159 /* print link status if flag set */ 2160 if (print_flag == 1) { 2161 rte_eth_link_to_str(link_status_text, 2162 sizeof(link_status_text), &link); 2163 printf("Port %d %s\n", portid, 2164 link_status_text); 2165 continue; 2166 } 2167 /* clear all_ports_up flag if any link down */ 2168 if (link.link_status == RTE_ETH_LINK_DOWN) { 2169 all_ports_up = 0; 2170 break; 2171 } 2172 } 2173 /* after finally printing all link status, get out */ 2174 if (print_flag == 1) 2175 break; 2176 2177 if (all_ports_up == 0) { 2178 printf("."); 2179 fflush(stdout); 2180 rte_delay_ms(CHECK_INTERVAL); 2181 } 2182 2183 /* set the print_flag if all ports up or timeout */ 2184 if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) { 2185 print_flag = 1; 2186 printf("done\n"); 2187 } 2188 } 2189 } 2190 2191 static int check_ptype(uint16_t portid) 2192 { 2193 int i, ret; 2194 int ptype_l3_ipv4 = 0; 2195 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 2196 int ptype_l3_ipv6 = 0; 2197 #endif 2198 uint32_t ptype_mask = RTE_PTYPE_L3_MASK; 2199 2200 ret = rte_eth_dev_get_supported_ptypes(portid, ptype_mask, NULL, 0); 2201 if (ret <= 0) 2202 return 0; 2203 2204 uint32_t ptypes[ret]; 2205 2206 ret = rte_eth_dev_get_supported_ptypes(portid, ptype_mask, ptypes, ret); 2207 for (i = 0; i < ret; ++i) { 2208 if (ptypes[i] & RTE_PTYPE_L3_IPV4) 2209 ptype_l3_ipv4 = 1; 2210 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 2211 if (ptypes[i] & RTE_PTYPE_L3_IPV6) 2212 ptype_l3_ipv6 = 1; 2213 #endif 2214 } 2215 2216 if (ptype_l3_ipv4 == 0) 2217 printf("port %d cannot parse RTE_PTYPE_L3_IPV4\n", portid); 2218 2219 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 2220 if (ptype_l3_ipv6 == 0) 2221 printf("port %d cannot parse RTE_PTYPE_L3_IPV6\n", portid); 2222 #endif 2223 2224 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 2225 if (ptype_l3_ipv4) 2226 #else /* APP_LOOKUP_EXACT_MATCH */ 2227 if (ptype_l3_ipv4 && ptype_l3_ipv6) 2228 #endif 2229 return 1; 2230 2231 return 0; 2232 2233 } 2234 2235 static int 2236 init_power_library(void) 2237 { 2238 enum power_management_env env; 2239 unsigned int lcore_id; 2240 int ret = 0; 2241 2242 RTE_LCORE_FOREACH(lcore_id) { 2243 /* init power management library */ 2244 ret = rte_power_init(lcore_id); 2245 if (ret) { 2246 RTE_LOG(ERR, L3FWD_POWER, 2247 "Library initialization failed on core %u\n", 2248 lcore_id); 2249 return ret; 2250 } 2251 /* we're not supporting the VM channel mode */ 2252 env = rte_power_get_env(); 2253 if (env != PM_ENV_ACPI_CPUFREQ && 2254 env != PM_ENV_PSTATE_CPUFREQ && 2255 env != PM_ENV_AMD_PSTATE_CPUFREQ && 2256 env != PM_ENV_CPPC_CPUFREQ) { 2257 RTE_LOG(ERR, L3FWD_POWER, 2258 "Only ACPI and PSTATE mode are supported\n"); 2259 return -1; 2260 } 2261 } 2262 return ret; 2263 } 2264 2265 static int 2266 deinit_power_library(void) 2267 { 2268 unsigned int lcore_id, max_pkg, max_die, die, pkg; 2269 int ret = 0; 2270 2271 RTE_LCORE_FOREACH(lcore_id) { 2272 /* deinit power management library */ 2273 ret = rte_power_exit(lcore_id); 2274 if (ret) { 2275 RTE_LOG(ERR, L3FWD_POWER, 2276 "Library deinitialization failed on core %u\n", 2277 lcore_id); 2278 return ret; 2279 } 2280 } 2281 2282 /* if uncore option was set */ 2283 if (enabled_uncore == 0) { 2284 max_pkg = rte_power_uncore_get_num_pkgs(); 2285 if (max_pkg == 0) 2286 return -1; 2287 for (pkg = 0; pkg < max_pkg; pkg++) { 2288 max_die = rte_power_uncore_get_num_dies(pkg); 2289 if (max_die == 0) 2290 return -1; 2291 for (die = 0; die < max_die; die++) { 2292 ret = rte_power_uncore_exit(pkg, die); 2293 if (ret < 0) { 2294 RTE_LOG(ERR, L3FWD_POWER, "Failed to exit uncore deinit successfully for pkg %02u die %02u\n" 2295 , pkg, die); 2296 return -1; 2297 } 2298 } 2299 } 2300 } 2301 return ret; 2302 } 2303 2304 static void 2305 get_current_stat_values(uint64_t *values) 2306 { 2307 unsigned int lcore_id = rte_lcore_id(); 2308 struct lcore_conf *qconf; 2309 uint64_t app_eps = 0, app_fps = 0, app_br = 0; 2310 uint64_t count = 0; 2311 2312 RTE_LCORE_FOREACH_WORKER(lcore_id) { 2313 qconf = &lcore_conf[lcore_id]; 2314 if (qconf->n_rx_queue == 0) 2315 continue; 2316 count++; 2317 rte_spinlock_lock(&stats[lcore_id].telemetry_lock); 2318 app_eps += stats[lcore_id].ep_nep[1]; 2319 app_fps += stats[lcore_id].fp_nfp[1]; 2320 app_br += stats[lcore_id].br; 2321 rte_spinlock_unlock(&stats[lcore_id].telemetry_lock); 2322 } 2323 2324 if (count > 0) { 2325 values[0] = app_eps/count; 2326 values[1] = app_fps/count; 2327 values[2] = app_br/count; 2328 } else 2329 memset(values, 0, sizeof(uint64_t) * NUM_TELSTATS); 2330 2331 } 2332 2333 static void 2334 update_telemetry(__rte_unused struct rte_timer *tim, 2335 __rte_unused void *arg) 2336 { 2337 int ret; 2338 uint64_t values[NUM_TELSTATS] = {0}; 2339 2340 get_current_stat_values(values); 2341 ret = rte_metrics_update_values(RTE_METRICS_GLOBAL, telstats_index, 2342 values, RTE_DIM(values)); 2343 if (ret < 0) 2344 RTE_LOG(WARNING, L3FWD_POWER, "failed to update metrics\n"); 2345 } 2346 2347 static int 2348 handle_app_stats(const char *cmd __rte_unused, 2349 const char *params __rte_unused, 2350 struct rte_tel_data *d) 2351 { 2352 uint64_t values[NUM_TELSTATS] = {0}; 2353 uint32_t i; 2354 2355 rte_tel_data_start_dict(d); 2356 get_current_stat_values(values); 2357 for (i = 0; i < NUM_TELSTATS; i++) 2358 rte_tel_data_add_dict_uint(d, telstats_strings[i].name, 2359 values[i]); 2360 return 0; 2361 } 2362 2363 static void 2364 telemetry_setup_timer(void) 2365 { 2366 int lcore_id = rte_lcore_id(); 2367 uint64_t hz = rte_get_timer_hz(); 2368 uint64_t ticks; 2369 2370 ticks = hz / TELEMETRY_INTERVALS_PER_SEC; 2371 rte_timer_reset_sync(&telemetry_timer, 2372 ticks, 2373 PERIODICAL, 2374 lcore_id, 2375 update_telemetry, 2376 NULL); 2377 } 2378 2379 static int 2380 launch_timer(unsigned int lcore_id) 2381 { 2382 int64_t prev_tsc = 0, cur_tsc, diff_tsc, cycles_10ms; 2383 2384 RTE_SET_USED(lcore_id); 2385 2386 2387 if (rte_get_main_lcore() != lcore_id) { 2388 rte_panic("timer on lcore:%d which is not main core:%d\n", 2389 lcore_id, 2390 rte_get_main_lcore()); 2391 } 2392 2393 RTE_LOG(INFO, L3FWD_POWER, "Bring up the Timer\n"); 2394 2395 telemetry_setup_timer(); 2396 2397 cycles_10ms = rte_get_timer_hz() / 100; 2398 2399 while (!is_done()) { 2400 cur_tsc = rte_rdtsc(); 2401 diff_tsc = cur_tsc - prev_tsc; 2402 if (diff_tsc > cycles_10ms) { 2403 rte_timer_manage(); 2404 prev_tsc = cur_tsc; 2405 cycles_10ms = rte_get_timer_hz() / 100; 2406 } 2407 } 2408 2409 RTE_LOG(INFO, L3FWD_POWER, "Timer_subsystem is done\n"); 2410 2411 return 0; 2412 } 2413 2414 static int 2415 autodetect_mode(void) 2416 { 2417 RTE_LOG(NOTICE, L3FWD_POWER, "Operating mode not specified, probing frequency scaling support...\n"); 2418 2419 /* 2420 * Empty poll and telemetry modes have to be specifically requested to 2421 * be enabled, but we can auto-detect between interrupt mode with or 2422 * without frequency scaling. Any of ACPI, pstate and CPPC can be used. 2423 */ 2424 if (rte_power_check_env_supported(PM_ENV_ACPI_CPUFREQ)) 2425 return APP_MODE_LEGACY; 2426 if (rte_power_check_env_supported(PM_ENV_PSTATE_CPUFREQ)) 2427 return APP_MODE_LEGACY; 2428 if (rte_power_check_env_supported(PM_ENV_AMD_PSTATE_CPUFREQ)) 2429 return APP_MODE_LEGACY; 2430 if (rte_power_check_env_supported(PM_ENV_CPPC_CPUFREQ)) 2431 return APP_MODE_LEGACY; 2432 2433 RTE_LOG(NOTICE, L3FWD_POWER, "Frequency scaling not supported, selecting interrupt-only mode\n"); 2434 2435 return APP_MODE_INTERRUPT; 2436 } 2437 2438 static const char * 2439 mode_to_str(enum appmode mode) 2440 { 2441 switch (mode) { 2442 case APP_MODE_LEGACY: 2443 return "legacy"; 2444 case APP_MODE_TELEMETRY: 2445 return "telemetry"; 2446 case APP_MODE_INTERRUPT: 2447 return "interrupt-only"; 2448 case APP_MODE_PMD_MGMT: 2449 return "pmd mgmt"; 2450 default: 2451 return "invalid"; 2452 } 2453 } 2454 2455 static uint32_t 2456 eth_dev_get_overhead_len(uint32_t max_rx_pktlen, uint16_t max_mtu) 2457 { 2458 uint32_t overhead_len; 2459 2460 if (max_mtu != UINT16_MAX && max_rx_pktlen > max_mtu) 2461 overhead_len = max_rx_pktlen - max_mtu; 2462 else 2463 overhead_len = RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN; 2464 2465 return overhead_len; 2466 } 2467 2468 static int 2469 config_port_max_pkt_len(struct rte_eth_conf *conf, 2470 struct rte_eth_dev_info *dev_info) 2471 { 2472 uint32_t overhead_len; 2473 2474 if (max_pkt_len == 0) 2475 return 0; 2476 2477 if (max_pkt_len < RTE_ETHER_MIN_LEN || max_pkt_len > MAX_JUMBO_PKT_LEN) 2478 return -1; 2479 2480 overhead_len = eth_dev_get_overhead_len(dev_info->max_rx_pktlen, 2481 dev_info->max_mtu); 2482 conf->rxmode.mtu = max_pkt_len - overhead_len; 2483 2484 if (conf->rxmode.mtu > RTE_ETHER_MTU) 2485 conf->txmode.offloads |= RTE_ETH_TX_OFFLOAD_MULTI_SEGS; 2486 2487 return 0; 2488 } 2489 2490 /* Power library initialized in the main routine. 8< */ 2491 int 2492 main(int argc, char **argv) 2493 { 2494 struct lcore_conf *qconf; 2495 struct rte_eth_dev_info dev_info; 2496 struct rte_eth_txconf *txconf; 2497 int ret; 2498 uint16_t nb_ports; 2499 uint16_t queueid; 2500 unsigned lcore_id; 2501 uint64_t hz; 2502 uint32_t n_tx_queue, nb_lcores; 2503 uint32_t dev_rxq_num, dev_txq_num; 2504 uint8_t nb_rx_queue, queue, socketid; 2505 uint16_t portid; 2506 const char *ptr_strings[NUM_TELSTATS]; 2507 2508 /* init EAL */ 2509 ret = rte_eal_init(argc, argv); 2510 if (ret < 0) 2511 rte_exit(EXIT_FAILURE, "Invalid EAL parameters\n"); 2512 argc -= ret; 2513 argv += ret; 2514 2515 /* catch SIGINT and restore cpufreq governor to ondemand */ 2516 signal(SIGINT, signal_exit_now); 2517 2518 /* init RTE timer library to be used late */ 2519 rte_timer_subsystem_init(); 2520 2521 /* if we're running pmd-mgmt mode, don't default to baseline mode */ 2522 baseline_enabled = false; 2523 2524 /* parse application arguments (after the EAL ones) */ 2525 ret = parse_args(argc, argv); 2526 if (ret < 0) 2527 rte_exit(EXIT_FAILURE, "Invalid L3FWD parameters\n"); 2528 2529 if (app_mode == APP_MODE_DEFAULT) 2530 app_mode = autodetect_mode(); 2531 2532 RTE_LOG(INFO, L3FWD_POWER, "Selected operation mode: %s\n", 2533 mode_to_str(app_mode)); 2534 2535 /* only legacy mode relies on power library */ 2536 if ((app_mode == APP_MODE_LEGACY) && init_power_library()) 2537 rte_exit(EXIT_FAILURE, "init_power_library failed\n"); 2538 2539 if (update_lcore_params() < 0) 2540 rte_exit(EXIT_FAILURE, "update_lcore_params failed\n"); 2541 2542 if (check_lcore_params() < 0) 2543 rte_exit(EXIT_FAILURE, "check_lcore_params failed\n"); 2544 2545 ret = init_lcore_rx_queues(); 2546 if (ret < 0) 2547 rte_exit(EXIT_FAILURE, "init_lcore_rx_queues failed\n"); 2548 2549 nb_ports = rte_eth_dev_count_avail(); 2550 2551 if (check_port_config() < 0) 2552 rte_exit(EXIT_FAILURE, "check_port_config failed\n"); 2553 2554 nb_lcores = rte_lcore_count(); 2555 2556 /* initialize all ports */ 2557 RTE_ETH_FOREACH_DEV(portid) { 2558 struct rte_eth_conf local_port_conf = port_conf; 2559 /* not all app modes need interrupts */ 2560 bool need_intr = app_mode == APP_MODE_LEGACY || 2561 app_mode == APP_MODE_INTERRUPT; 2562 2563 /* skip ports that are not enabled */ 2564 if ((enabled_port_mask & (1 << portid)) == 0) { 2565 printf("\nSkipping disabled port %d\n", portid); 2566 continue; 2567 } 2568 2569 /* init port */ 2570 printf("Initializing port %d ... ", portid ); 2571 fflush(stdout); 2572 2573 ret = rte_eth_dev_info_get(portid, &dev_info); 2574 if (ret != 0) 2575 rte_exit(EXIT_FAILURE, 2576 "Error during getting device (port %u) info: %s\n", 2577 portid, strerror(-ret)); 2578 2579 dev_rxq_num = dev_info.max_rx_queues; 2580 dev_txq_num = dev_info.max_tx_queues; 2581 2582 nb_rx_queue = get_port_n_rx_queues(portid); 2583 if (nb_rx_queue > dev_rxq_num) 2584 rte_exit(EXIT_FAILURE, 2585 "Cannot configure not existed rxq: " 2586 "port=%d\n", portid); 2587 2588 n_tx_queue = nb_lcores; 2589 if (n_tx_queue > dev_txq_num) 2590 n_tx_queue = dev_txq_num; 2591 printf("Creating queues: nb_rxq=%d nb_txq=%u... ", 2592 nb_rx_queue, (unsigned)n_tx_queue ); 2593 /* If number of Rx queue is 0, no need to enable Rx interrupt */ 2594 if (nb_rx_queue == 0) 2595 need_intr = false; 2596 2597 if (need_intr) 2598 local_port_conf.intr_conf.rxq = 1; 2599 2600 ret = rte_eth_dev_info_get(portid, &dev_info); 2601 if (ret != 0) 2602 rte_exit(EXIT_FAILURE, 2603 "Error during getting device (port %u) info: %s\n", 2604 portid, strerror(-ret)); 2605 2606 ret = config_port_max_pkt_len(&local_port_conf, &dev_info); 2607 if (ret != 0) 2608 rte_exit(EXIT_FAILURE, 2609 "Invalid max packet length: %u (port %u)\n", 2610 max_pkt_len, portid); 2611 2612 if (dev_info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) 2613 local_port_conf.txmode.offloads |= 2614 RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE; 2615 2616 local_port_conf.rx_adv_conf.rss_conf.rss_hf &= 2617 dev_info.flow_type_rss_offloads; 2618 if (local_port_conf.rx_adv_conf.rss_conf.rss_hf != 2619 port_conf.rx_adv_conf.rss_conf.rss_hf) { 2620 printf("Port %u modified RSS hash function based on hardware support," 2621 "requested:%#"PRIx64" configured:%#"PRIx64"\n", 2622 portid, 2623 port_conf.rx_adv_conf.rss_conf.rss_hf, 2624 local_port_conf.rx_adv_conf.rss_conf.rss_hf); 2625 } 2626 2627 if (local_port_conf.rx_adv_conf.rss_conf.rss_hf == 0) 2628 local_port_conf.rxmode.mq_mode = RTE_ETH_MQ_RX_NONE; 2629 local_port_conf.rxmode.offloads &= dev_info.rx_offload_capa; 2630 port_conf.rxmode.offloads = local_port_conf.rxmode.offloads; 2631 2632 ret = rte_eth_dev_configure(portid, nb_rx_queue, 2633 (uint16_t)n_tx_queue, &local_port_conf); 2634 if (ret < 0) 2635 rte_exit(EXIT_FAILURE, "Cannot configure device: " 2636 "err=%d, port=%d\n", ret, portid); 2637 2638 ret = rte_eth_dev_adjust_nb_rx_tx_desc(portid, &nb_rxd, 2639 &nb_txd); 2640 if (ret < 0) 2641 rte_exit(EXIT_FAILURE, 2642 "Cannot adjust number of descriptors: err=%d, port=%d\n", 2643 ret, portid); 2644 2645 ret = rte_eth_macaddr_get(portid, &ports_eth_addr[portid]); 2646 if (ret < 0) 2647 rte_exit(EXIT_FAILURE, 2648 "Cannot get MAC address: err=%d, port=%d\n", 2649 ret, portid); 2650 2651 print_ethaddr(" Address:", &ports_eth_addr[portid]); 2652 printf(", "); 2653 2654 /* init memory */ 2655 ret = init_mem(NB_MBUF); 2656 if (ret < 0) 2657 rte_exit(EXIT_FAILURE, "init_mem failed\n"); 2658 2659 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { 2660 if (rte_lcore_is_enabled(lcore_id) == 0) 2661 continue; 2662 2663 /* Initialize TX buffers */ 2664 qconf = &lcore_conf[lcore_id]; 2665 qconf->tx_buffer[portid] = rte_zmalloc_socket("tx_buffer", 2666 RTE_ETH_TX_BUFFER_SIZE(MAX_PKT_BURST), 0, 2667 rte_eth_dev_socket_id(portid)); 2668 if (qconf->tx_buffer[portid] == NULL) 2669 rte_exit(EXIT_FAILURE, "Can't allocate tx buffer for port %u\n", 2670 portid); 2671 2672 rte_eth_tx_buffer_init(qconf->tx_buffer[portid], MAX_PKT_BURST); 2673 } 2674 2675 /* init one TX queue per couple (lcore,port) */ 2676 queueid = 0; 2677 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { 2678 if (rte_lcore_is_enabled(lcore_id) == 0) 2679 continue; 2680 2681 if (queueid >= dev_txq_num) 2682 continue; 2683 2684 if (numa_on) 2685 socketid = \ 2686 (uint8_t)rte_lcore_to_socket_id(lcore_id); 2687 else 2688 socketid = 0; 2689 2690 printf("txq=%u,%d,%d ", lcore_id, queueid, socketid); 2691 fflush(stdout); 2692 2693 txconf = &dev_info.default_txconf; 2694 txconf->offloads = local_port_conf.txmode.offloads; 2695 ret = rte_eth_tx_queue_setup(portid, queueid, nb_txd, 2696 socketid, txconf); 2697 if (ret < 0) 2698 rte_exit(EXIT_FAILURE, 2699 "rte_eth_tx_queue_setup: err=%d, " 2700 "port=%d\n", ret, portid); 2701 2702 qconf = &lcore_conf[lcore_id]; 2703 qconf->tx_queue_id[portid] = queueid; 2704 queueid++; 2705 2706 qconf->tx_port_id[qconf->n_tx_port] = portid; 2707 qconf->n_tx_port++; 2708 } 2709 printf("\n"); 2710 } 2711 2712 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { 2713 if (rte_lcore_is_enabled(lcore_id) == 0) 2714 continue; 2715 2716 if (app_mode == APP_MODE_LEGACY) { 2717 /* init timer structures for each enabled lcore */ 2718 rte_timer_init(&power_timers[lcore_id]); 2719 hz = rte_get_timer_hz(); 2720 rte_timer_reset(&power_timers[lcore_id], 2721 hz/TIMER_NUMBER_PER_SECOND, 2722 SINGLE, lcore_id, 2723 power_timer_cb, NULL); 2724 } 2725 qconf = &lcore_conf[lcore_id]; 2726 printf("\nInitializing rx queues on lcore %u ... ", lcore_id ); 2727 fflush(stdout); 2728 2729 /* init RX queues */ 2730 for(queue = 0; queue < qconf->n_rx_queue; ++queue) { 2731 struct rte_eth_rxconf rxq_conf; 2732 2733 portid = qconf->rx_queue_list[queue].port_id; 2734 queueid = qconf->rx_queue_list[queue].queue_id; 2735 2736 if (numa_on) 2737 socketid = \ 2738 (uint8_t)rte_lcore_to_socket_id(lcore_id); 2739 else 2740 socketid = 0; 2741 2742 printf("rxq=%d,%d,%d ", portid, queueid, socketid); 2743 fflush(stdout); 2744 2745 ret = rte_eth_dev_info_get(portid, &dev_info); 2746 if (ret != 0) 2747 rte_exit(EXIT_FAILURE, 2748 "Error during getting device (port %u) info: %s\n", 2749 portid, strerror(-ret)); 2750 2751 rxq_conf = dev_info.default_rxconf; 2752 rxq_conf.offloads = port_conf.rxmode.offloads; 2753 ret = rte_eth_rx_queue_setup(portid, queueid, nb_rxd, 2754 socketid, &rxq_conf, 2755 pktmbuf_pool[socketid]); 2756 if (ret < 0) 2757 rte_exit(EXIT_FAILURE, 2758 "rte_eth_rx_queue_setup: err=%d, " 2759 "port=%d\n", ret, portid); 2760 2761 if (parse_ptype) { 2762 if (add_cb_parse_ptype(portid, queueid) < 0) 2763 rte_exit(EXIT_FAILURE, 2764 "Fail to add ptype cb\n"); 2765 } 2766 2767 if (app_mode == APP_MODE_PMD_MGMT && !baseline_enabled) { 2768 /* Set power_pmd_mgmt configs passed by user */ 2769 rte_power_pmd_mgmt_set_emptypoll_max(max_empty_polls); 2770 ret = rte_power_pmd_mgmt_set_pause_duration(pause_duration); 2771 if (ret < 0) 2772 rte_exit(EXIT_FAILURE, 2773 "Error setting pause_duration: err=%d, lcore=%d\n", 2774 ret, lcore_id); 2775 2776 ret = rte_power_pmd_mgmt_set_scaling_freq_min(lcore_id, 2777 scale_freq_min); 2778 if (ret < 0) 2779 rte_exit(EXIT_FAILURE, 2780 "Error setting scaling freq min: err=%d, lcore=%d\n", 2781 ret, lcore_id); 2782 2783 ret = rte_power_pmd_mgmt_set_scaling_freq_max(lcore_id, 2784 scale_freq_max); 2785 if (ret < 0) 2786 rte_exit(EXIT_FAILURE, 2787 "Error setting scaling freq max: err=%d, lcore %d\n", 2788 ret, lcore_id); 2789 2790 ret = rte_power_ethdev_pmgmt_queue_enable( 2791 lcore_id, portid, queueid, 2792 pmgmt_type); 2793 if (ret < 0) 2794 rte_exit(EXIT_FAILURE, 2795 "rte_power_ethdev_pmgmt_queue_enable: err=%d, port=%d\n", 2796 ret, portid); 2797 } 2798 } 2799 } 2800 /* >8 End of power library initialization. */ 2801 2802 printf("\n"); 2803 2804 /* start ports */ 2805 RTE_ETH_FOREACH_DEV(portid) { 2806 if ((enabled_port_mask & (1 << portid)) == 0) { 2807 continue; 2808 } 2809 /* Start device */ 2810 ret = rte_eth_dev_start(portid); 2811 if (ret < 0) 2812 rte_exit(EXIT_FAILURE, "rte_eth_dev_start: err=%d, " 2813 "port=%d\n", ret, portid); 2814 /* 2815 * If enabled, put device in promiscuous mode. 2816 * This allows IO forwarding mode to forward packets 2817 * to itself through 2 cross-connected ports of the 2818 * target machine. 2819 */ 2820 if (promiscuous_on) { 2821 ret = rte_eth_promiscuous_enable(portid); 2822 if (ret != 0) 2823 rte_exit(EXIT_FAILURE, 2824 "rte_eth_promiscuous_enable: err=%s, port=%u\n", 2825 rte_strerror(-ret), portid); 2826 } 2827 /* initialize spinlock for each port */ 2828 rte_spinlock_init(&(locks[portid])); 2829 2830 if (!parse_ptype) 2831 if (!check_ptype(portid)) 2832 rte_exit(EXIT_FAILURE, 2833 "PMD can not provide needed ptypes\n"); 2834 } 2835 2836 check_all_ports_link_status(enabled_port_mask); 2837 2838 /* launch per-lcore init on every lcore */ 2839 if (app_mode == APP_MODE_LEGACY) { 2840 rte_eal_mp_remote_launch(main_legacy_loop, NULL, CALL_MAIN); 2841 } else if (app_mode == APP_MODE_TELEMETRY) { 2842 unsigned int i; 2843 2844 /* Init metrics library */ 2845 rte_metrics_init(rte_socket_id()); 2846 /** Register stats with metrics library */ 2847 for (i = 0; i < NUM_TELSTATS; i++) 2848 ptr_strings[i] = telstats_strings[i].name; 2849 2850 ret = rte_metrics_reg_names(ptr_strings, NUM_TELSTATS); 2851 if (ret >= 0) 2852 telstats_index = ret; 2853 else 2854 rte_exit(EXIT_FAILURE, "failed to register metrics names"); 2855 2856 RTE_LCORE_FOREACH_WORKER(lcore_id) { 2857 rte_spinlock_init(&stats[lcore_id].telemetry_lock); 2858 } 2859 rte_timer_init(&telemetry_timer); 2860 rte_telemetry_register_cmd("/l3fwd-power/stats", 2861 handle_app_stats, 2862 "Returns global power stats. Parameters: None"); 2863 rte_eal_mp_remote_launch(main_telemetry_loop, NULL, 2864 SKIP_MAIN); 2865 } else if (app_mode == APP_MODE_INTERRUPT) { 2866 rte_eal_mp_remote_launch(main_intr_loop, NULL, CALL_MAIN); 2867 } else if (app_mode == APP_MODE_PMD_MGMT) { 2868 /* reuse telemetry loop for PMD power management mode */ 2869 rte_eal_mp_remote_launch(main_telemetry_loop, NULL, CALL_MAIN); 2870 } 2871 2872 if (app_mode == APP_MODE_TELEMETRY) 2873 launch_timer(rte_lcore_id()); 2874 2875 RTE_LCORE_FOREACH_WORKER(lcore_id) { 2876 if (rte_eal_wait_lcore(lcore_id) < 0) 2877 return -1; 2878 } 2879 2880 if (app_mode == APP_MODE_PMD_MGMT) { 2881 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { 2882 if (rte_lcore_is_enabled(lcore_id) == 0) 2883 continue; 2884 qconf = &lcore_conf[lcore_id]; 2885 for (queue = 0; queue < qconf->n_rx_queue; ++queue) { 2886 portid = qconf->rx_queue_list[queue].port_id; 2887 queueid = qconf->rx_queue_list[queue].queue_id; 2888 2889 rte_power_ethdev_pmgmt_queue_disable(lcore_id, 2890 portid, queueid); 2891 } 2892 } 2893 } 2894 2895 RTE_ETH_FOREACH_DEV(portid) 2896 { 2897 if ((enabled_port_mask & (1 << portid)) == 0) 2898 continue; 2899 2900 ret = rte_eth_dev_stop(portid); 2901 if (ret != 0) 2902 RTE_LOG(ERR, L3FWD_POWER, "rte_eth_dev_stop: err=%d, port=%u\n", 2903 ret, portid); 2904 2905 rte_eth_dev_close(portid); 2906 } 2907 2908 if ((app_mode == APP_MODE_LEGACY) && deinit_power_library()) 2909 rte_exit(EXIT_FAILURE, "deinit_power_library failed\n"); 2910 2911 if (rte_eal_cleanup() < 0) 2912 RTE_LOG(ERR, L3FWD_POWER, "EAL cleanup failed\n"); 2913 2914 return 0; 2915 } 2916