1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2018 Intel Corporation 3 */ 4 5 #include <stdio.h> 6 #include <stdlib.h> 7 #include <stdint.h> 8 #include <inttypes.h> 9 #include <sys/types.h> 10 #include <string.h> 11 #include <sys/queue.h> 12 #include <stdarg.h> 13 #include <errno.h> 14 #include <getopt.h> 15 #include <unistd.h> 16 #include <signal.h> 17 #include <math.h> 18 19 #include <rte_common.h> 20 #include <rte_byteorder.h> 21 #include <rte_log.h> 22 #include <rte_malloc.h> 23 #include <rte_memory.h> 24 #include <rte_memcpy.h> 25 #include <rte_eal.h> 26 #include <rte_launch.h> 27 #include <rte_atomic.h> 28 #include <rte_cycles.h> 29 #include <rte_prefetch.h> 30 #include <rte_lcore.h> 31 #include <rte_per_lcore.h> 32 #include <rte_branch_prediction.h> 33 #include <rte_interrupts.h> 34 #include <rte_random.h> 35 #include <rte_debug.h> 36 #include <rte_ether.h> 37 #include <rte_ethdev.h> 38 #include <rte_mempool.h> 39 #include <rte_mbuf.h> 40 #include <rte_ip.h> 41 #include <rte_tcp.h> 42 #include <rte_udp.h> 43 #include <rte_string_fns.h> 44 #include <rte_timer.h> 45 #include <rte_power.h> 46 #include <rte_spinlock.h> 47 #include <rte_power_empty_poll.h> 48 #include <rte_metrics.h> 49 #include <rte_telemetry.h> 50 #include <rte_power_pmd_mgmt.h> 51 52 #include "perf_core.h" 53 #include "main.h" 54 55 #define RTE_LOGTYPE_L3FWD_POWER RTE_LOGTYPE_USER1 56 57 #define MAX_PKT_BURST 32 58 59 #define MIN_ZERO_POLL_COUNT 10 60 61 /* 100 ms interval */ 62 #define TIMER_NUMBER_PER_SECOND 10 63 /* (10ms) */ 64 #define INTERVALS_PER_SECOND 100 65 /* 100000 us */ 66 #define SCALING_PERIOD (1000000/TIMER_NUMBER_PER_SECOND) 67 #define SCALING_DOWN_TIME_RATIO_THRESHOLD 0.25 68 69 #define APP_LOOKUP_EXACT_MATCH 0 70 #define APP_LOOKUP_LPM 1 71 #define DO_RFC_1812_CHECKS 72 73 #ifndef APP_LOOKUP_METHOD 74 #define APP_LOOKUP_METHOD APP_LOOKUP_LPM 75 #endif 76 77 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 78 #include <rte_hash.h> 79 #elif (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 80 #include <rte_lpm.h> 81 #else 82 #error "APP_LOOKUP_METHOD set to incorrect value" 83 #endif 84 85 #ifndef IPv6_BYTES 86 #define IPv6_BYTES_FMT "%02x%02x:%02x%02x:%02x%02x:%02x%02x:"\ 87 "%02x%02x:%02x%02x:%02x%02x:%02x%02x" 88 #define IPv6_BYTES(addr) \ 89 addr[0], addr[1], addr[2], addr[3], \ 90 addr[4], addr[5], addr[6], addr[7], \ 91 addr[8], addr[9], addr[10], addr[11],\ 92 addr[12], addr[13],addr[14], addr[15] 93 #endif 94 95 #define MAX_JUMBO_PKT_LEN 9600 96 97 #define IPV6_ADDR_LEN 16 98 99 #define MEMPOOL_CACHE_SIZE 256 100 101 /* 102 * This expression is used to calculate the number of mbufs needed depending on 103 * user input, taking into account memory for rx and tx hardware rings, cache 104 * per lcore and mtable per port per lcore. RTE_MAX is used to ensure that 105 * NB_MBUF never goes below a minimum value of 8192. 106 */ 107 108 #define NB_MBUF RTE_MAX ( \ 109 (nb_ports*nb_rx_queue*nb_rxd + \ 110 nb_ports*nb_lcores*MAX_PKT_BURST + \ 111 nb_ports*n_tx_queue*nb_txd + \ 112 nb_lcores*MEMPOOL_CACHE_SIZE), \ 113 (unsigned)8192) 114 115 #define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */ 116 117 #define NB_SOCKETS 8 118 119 /* Configure how many packets ahead to prefetch, when reading packets */ 120 #define PREFETCH_OFFSET 3 121 122 /* 123 * Configurable number of RX/TX ring descriptors 124 */ 125 #define RTE_TEST_RX_DESC_DEFAULT 1024 126 #define RTE_TEST_TX_DESC_DEFAULT 1024 127 128 /* 129 * These two thresholds were decided on by running the training algorithm on 130 * a 2.5GHz Xeon. These defaults can be overridden by supplying non-zero values 131 * for the med_threshold and high_threshold parameters on the command line. 132 */ 133 #define EMPTY_POLL_MED_THRESHOLD 350000UL 134 #define EMPTY_POLL_HGH_THRESHOLD 580000UL 135 136 #define NUM_TELSTATS RTE_DIM(telstats_strings) 137 138 static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT; 139 static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT; 140 141 /* ethernet addresses of ports */ 142 static struct rte_ether_addr ports_eth_addr[RTE_MAX_ETHPORTS]; 143 144 /* ethernet addresses of ports */ 145 static rte_spinlock_t locks[RTE_MAX_ETHPORTS]; 146 147 /* mask of enabled ports */ 148 static uint32_t enabled_port_mask = 0; 149 /* Ports set in promiscuous mode off by default. */ 150 static int promiscuous_on = 0; 151 /* NUMA is enabled by default. */ 152 static int numa_on = 1; 153 static bool empty_poll_stop; 154 static bool empty_poll_train; 155 volatile bool quit_signal; 156 static struct ep_params *ep_params; 157 static struct ep_policy policy; 158 static long ep_med_edpi, ep_hgh_edpi; 159 /* timer to update telemetry every 500ms */ 160 static struct rte_timer telemetry_timer; 161 162 /* stats index returned by metrics lib */ 163 int telstats_index; 164 165 struct telstats_name { 166 char name[RTE_ETH_XSTATS_NAME_SIZE]; 167 }; 168 169 /* telemetry stats to be reported */ 170 const struct telstats_name telstats_strings[] = { 171 {"empty_poll"}, 172 {"full_poll"}, 173 {"busy_percent"} 174 }; 175 176 /* core busyness in percentage */ 177 enum busy_rate { 178 ZERO = 0, 179 PARTIAL = 50, 180 FULL = 100 181 }; 182 183 /* reference poll count to measure core busyness */ 184 #define DEFAULT_COUNT 10000 185 /* 186 * reference CYCLES to be used to 187 * measure core busyness based on poll count 188 */ 189 #define MIN_CYCLES 1500000ULL 190 #define MAX_CYCLES 22000000ULL 191 192 /* (500ms) */ 193 #define TELEMETRY_INTERVALS_PER_SEC 2 194 195 static int parse_ptype; /**< Parse packet type using rx callback, and */ 196 /**< disabled by default */ 197 198 enum appmode { 199 APP_MODE_DEFAULT = 0, 200 APP_MODE_LEGACY, 201 APP_MODE_EMPTY_POLL, 202 APP_MODE_TELEMETRY, 203 APP_MODE_INTERRUPT, 204 APP_MODE_PMD_MGMT 205 }; 206 207 enum appmode app_mode; 208 209 static enum rte_power_pmd_mgmt_type pmgmt_type; 210 bool baseline_enabled; 211 212 enum freq_scale_hint_t 213 { 214 FREQ_LOWER = -1, 215 FREQ_CURRENT = 0, 216 FREQ_HIGHER = 1, 217 FREQ_HIGHEST = 2 218 }; 219 220 struct lcore_rx_queue { 221 uint16_t port_id; 222 uint8_t queue_id; 223 enum freq_scale_hint_t freq_up_hint; 224 uint32_t zero_rx_packet_count; 225 uint32_t idle_hint; 226 } __rte_cache_aligned; 227 228 #define MAX_RX_QUEUE_PER_LCORE 16 229 #define MAX_TX_QUEUE_PER_PORT RTE_MAX_ETHPORTS 230 #define MAX_RX_QUEUE_PER_PORT 128 231 232 #define MAX_RX_QUEUE_INTERRUPT_PER_PORT 16 233 234 235 struct lcore_params lcore_params_array[MAX_LCORE_PARAMS]; 236 static struct lcore_params lcore_params_array_default[] = { 237 {0, 0, 2}, 238 {0, 1, 2}, 239 {0, 2, 2}, 240 {1, 0, 2}, 241 {1, 1, 2}, 242 {1, 2, 2}, 243 {2, 0, 2}, 244 {3, 0, 3}, 245 {3, 1, 3}, 246 }; 247 248 struct lcore_params *lcore_params = lcore_params_array_default; 249 uint16_t nb_lcore_params = RTE_DIM(lcore_params_array_default); 250 251 static struct rte_eth_conf port_conf = { 252 .rxmode = { 253 .mq_mode = ETH_MQ_RX_RSS, 254 .max_rx_pkt_len = RTE_ETHER_MAX_LEN, 255 .split_hdr_size = 0, 256 .offloads = DEV_RX_OFFLOAD_CHECKSUM, 257 }, 258 .rx_adv_conf = { 259 .rss_conf = { 260 .rss_key = NULL, 261 .rss_hf = ETH_RSS_UDP, 262 }, 263 }, 264 .txmode = { 265 .mq_mode = ETH_MQ_TX_NONE, 266 } 267 }; 268 269 static struct rte_mempool * pktmbuf_pool[NB_SOCKETS]; 270 271 272 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 273 274 #ifdef RTE_ARCH_X86 275 #include <rte_hash_crc.h> 276 #define DEFAULT_HASH_FUNC rte_hash_crc 277 #else 278 #include <rte_jhash.h> 279 #define DEFAULT_HASH_FUNC rte_jhash 280 #endif 281 282 struct ipv4_5tuple { 283 uint32_t ip_dst; 284 uint32_t ip_src; 285 uint16_t port_dst; 286 uint16_t port_src; 287 uint8_t proto; 288 } __rte_packed; 289 290 struct ipv6_5tuple { 291 uint8_t ip_dst[IPV6_ADDR_LEN]; 292 uint8_t ip_src[IPV6_ADDR_LEN]; 293 uint16_t port_dst; 294 uint16_t port_src; 295 uint8_t proto; 296 } __rte_packed; 297 298 struct ipv4_l3fwd_route { 299 struct ipv4_5tuple key; 300 uint8_t if_out; 301 }; 302 303 struct ipv6_l3fwd_route { 304 struct ipv6_5tuple key; 305 uint8_t if_out; 306 }; 307 308 static struct ipv4_l3fwd_route ipv4_l3fwd_route_array[] = { 309 {{RTE_IPV4(100,10,0,1), RTE_IPV4(200,10,0,1), 101, 11, IPPROTO_TCP}, 0}, 310 {{RTE_IPV4(100,20,0,2), RTE_IPV4(200,20,0,2), 102, 12, IPPROTO_TCP}, 1}, 311 {{RTE_IPV4(100,30,0,3), RTE_IPV4(200,30,0,3), 103, 13, IPPROTO_TCP}, 2}, 312 {{RTE_IPV4(100,40,0,4), RTE_IPV4(200,40,0,4), 104, 14, IPPROTO_TCP}, 3}, 313 }; 314 315 static struct ipv6_l3fwd_route ipv6_l3fwd_route_array[] = { 316 { 317 { 318 {0xfe, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 319 0x02, 0x1b, 0x21, 0xff, 0xfe, 0x91, 0x38, 0x05}, 320 {0xfe, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 321 0x02, 0x1e, 0x67, 0xff, 0xfe, 0x0d, 0xb6, 0x0a}, 322 1, 10, IPPROTO_UDP 323 }, 4 324 }, 325 }; 326 327 typedef struct rte_hash lookup_struct_t; 328 static lookup_struct_t *ipv4_l3fwd_lookup_struct[NB_SOCKETS]; 329 static lookup_struct_t *ipv6_l3fwd_lookup_struct[NB_SOCKETS]; 330 331 #define L3FWD_HASH_ENTRIES 1024 332 333 static uint16_t ipv4_l3fwd_out_if[L3FWD_HASH_ENTRIES] __rte_cache_aligned; 334 static uint16_t ipv6_l3fwd_out_if[L3FWD_HASH_ENTRIES] __rte_cache_aligned; 335 #endif 336 337 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 338 struct ipv4_l3fwd_route { 339 uint32_t ip; 340 uint8_t depth; 341 uint8_t if_out; 342 }; 343 344 static struct ipv4_l3fwd_route ipv4_l3fwd_route_array[] = { 345 {RTE_IPV4(1,1,1,0), 24, 0}, 346 {RTE_IPV4(2,1,1,0), 24, 1}, 347 {RTE_IPV4(3,1,1,0), 24, 2}, 348 {RTE_IPV4(4,1,1,0), 24, 3}, 349 {RTE_IPV4(5,1,1,0), 24, 4}, 350 {RTE_IPV4(6,1,1,0), 24, 5}, 351 {RTE_IPV4(7,1,1,0), 24, 6}, 352 {RTE_IPV4(8,1,1,0), 24, 7}, 353 }; 354 355 #define IPV4_L3FWD_LPM_MAX_RULES 1024 356 357 typedef struct rte_lpm lookup_struct_t; 358 static lookup_struct_t *ipv4_l3fwd_lookup_struct[NB_SOCKETS]; 359 #endif 360 361 struct lcore_conf { 362 uint16_t n_rx_queue; 363 struct lcore_rx_queue rx_queue_list[MAX_RX_QUEUE_PER_LCORE]; 364 uint16_t n_tx_port; 365 uint16_t tx_port_id[RTE_MAX_ETHPORTS]; 366 uint16_t tx_queue_id[RTE_MAX_ETHPORTS]; 367 struct rte_eth_dev_tx_buffer *tx_buffer[RTE_MAX_ETHPORTS]; 368 lookup_struct_t * ipv4_lookup_struct; 369 lookup_struct_t * ipv6_lookup_struct; 370 } __rte_cache_aligned; 371 372 struct lcore_stats { 373 /* total sleep time in ms since last frequency scaling down */ 374 uint32_t sleep_time; 375 /* number of long sleep recently */ 376 uint32_t nb_long_sleep; 377 /* freq. scaling up trend */ 378 uint32_t trend; 379 /* total packet processed recently */ 380 uint64_t nb_rx_processed; 381 /* total iterations looped recently */ 382 uint64_t nb_iteration_looped; 383 /* 384 * Represents empty and non empty polls 385 * of rte_eth_rx_burst(); 386 * ep_nep[0] holds non empty polls 387 * i.e. 0 < nb_rx <= MAX_BURST 388 * ep_nep[1] holds empty polls. 389 * i.e. nb_rx == 0 390 */ 391 uint64_t ep_nep[2]; 392 /* 393 * Represents full and empty+partial 394 * polls of rte_eth_rx_burst(); 395 * ep_nep[0] holds empty+partial polls. 396 * i.e. 0 <= nb_rx < MAX_BURST 397 * ep_nep[1] holds full polls 398 * i.e. nb_rx == MAX_BURST 399 */ 400 uint64_t fp_nfp[2]; 401 enum busy_rate br; 402 rte_spinlock_t telemetry_lock; 403 } __rte_cache_aligned; 404 405 static struct lcore_conf lcore_conf[RTE_MAX_LCORE] __rte_cache_aligned; 406 static struct lcore_stats stats[RTE_MAX_LCORE] __rte_cache_aligned; 407 static struct rte_timer power_timers[RTE_MAX_LCORE]; 408 409 static inline uint32_t power_idle_heuristic(uint32_t zero_rx_packet_count); 410 static inline enum freq_scale_hint_t power_freq_scaleup_heuristic( \ 411 unsigned int lcore_id, uint16_t port_id, uint16_t queue_id); 412 413 414 /* 415 * These defaults are using the max frequency index (1), a medium index (9) 416 * and a typical low frequency index (14). These can be adjusted to use 417 * different indexes using the relevant command line parameters. 418 */ 419 static uint8_t freq_tlb[] = {14, 9, 1}; 420 421 static int is_done(void) 422 { 423 return quit_signal; 424 } 425 426 /* exit signal handler */ 427 static void 428 signal_exit_now(int sigtype) 429 { 430 431 if (sigtype == SIGINT) 432 quit_signal = true; 433 434 } 435 436 /* Freqency scale down timer callback */ 437 static void 438 power_timer_cb(__rte_unused struct rte_timer *tim, 439 __rte_unused void *arg) 440 { 441 uint64_t hz; 442 float sleep_time_ratio; 443 unsigned lcore_id = rte_lcore_id(); 444 445 /* accumulate total execution time in us when callback is invoked */ 446 sleep_time_ratio = (float)(stats[lcore_id].sleep_time) / 447 (float)SCALING_PERIOD; 448 /** 449 * check whether need to scale down frequency a step if it sleep a lot. 450 */ 451 if (sleep_time_ratio >= SCALING_DOWN_TIME_RATIO_THRESHOLD) { 452 if (rte_power_freq_down) 453 rte_power_freq_down(lcore_id); 454 } 455 else if ( (unsigned)(stats[lcore_id].nb_rx_processed / 456 stats[lcore_id].nb_iteration_looped) < MAX_PKT_BURST) { 457 /** 458 * scale down a step if average packet per iteration less 459 * than expectation. 460 */ 461 if (rte_power_freq_down) 462 rte_power_freq_down(lcore_id); 463 } 464 465 /** 466 * initialize another timer according to current frequency to ensure 467 * timer interval is relatively fixed. 468 */ 469 hz = rte_get_timer_hz(); 470 rte_timer_reset(&power_timers[lcore_id], hz/TIMER_NUMBER_PER_SECOND, 471 SINGLE, lcore_id, power_timer_cb, NULL); 472 473 stats[lcore_id].nb_rx_processed = 0; 474 stats[lcore_id].nb_iteration_looped = 0; 475 476 stats[lcore_id].sleep_time = 0; 477 } 478 479 /* Enqueue a single packet, and send burst if queue is filled */ 480 static inline int 481 send_single_packet(struct rte_mbuf *m, uint16_t port) 482 { 483 uint32_t lcore_id; 484 struct lcore_conf *qconf; 485 486 lcore_id = rte_lcore_id(); 487 qconf = &lcore_conf[lcore_id]; 488 489 rte_eth_tx_buffer(port, qconf->tx_queue_id[port], 490 qconf->tx_buffer[port], m); 491 492 return 0; 493 } 494 495 #ifdef DO_RFC_1812_CHECKS 496 static inline int 497 is_valid_ipv4_pkt(struct rte_ipv4_hdr *pkt, uint32_t link_len) 498 { 499 /* From http://www.rfc-editor.org/rfc/rfc1812.txt section 5.2.2 */ 500 /* 501 * 1. The packet length reported by the Link Layer must be large 502 * enough to hold the minimum length legal IP datagram (20 bytes). 503 */ 504 if (link_len < sizeof(struct rte_ipv4_hdr)) 505 return -1; 506 507 /* 2. The IP checksum must be correct. */ 508 /* this is checked in H/W */ 509 510 /* 511 * 3. The IP version number must be 4. If the version number is not 4 512 * then the packet may be another version of IP, such as IPng or 513 * ST-II. 514 */ 515 if (((pkt->version_ihl) >> 4) != 4) 516 return -3; 517 /* 518 * 4. The IP header length field must be large enough to hold the 519 * minimum length legal IP datagram (20 bytes = 5 words). 520 */ 521 if ((pkt->version_ihl & 0xf) < 5) 522 return -4; 523 524 /* 525 * 5. The IP total length field must be large enough to hold the IP 526 * datagram header, whose length is specified in the IP header length 527 * field. 528 */ 529 if (rte_cpu_to_be_16(pkt->total_length) < sizeof(struct rte_ipv4_hdr)) 530 return -5; 531 532 return 0; 533 } 534 #endif 535 536 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 537 static void 538 print_ipv4_key(struct ipv4_5tuple key) 539 { 540 printf("IP dst = %08x, IP src = %08x, port dst = %d, port src = %d, " 541 "proto = %d\n", (unsigned)key.ip_dst, (unsigned)key.ip_src, 542 key.port_dst, key.port_src, key.proto); 543 } 544 static void 545 print_ipv6_key(struct ipv6_5tuple key) 546 { 547 printf( "IP dst = " IPv6_BYTES_FMT ", IP src = " IPv6_BYTES_FMT ", " 548 "port dst = %d, port src = %d, proto = %d\n", 549 IPv6_BYTES(key.ip_dst), IPv6_BYTES(key.ip_src), 550 key.port_dst, key.port_src, key.proto); 551 } 552 553 static inline uint16_t 554 get_ipv4_dst_port(struct rte_ipv4_hdr *ipv4_hdr, uint16_t portid, 555 lookup_struct_t * ipv4_l3fwd_lookup_struct) 556 { 557 struct ipv4_5tuple key; 558 struct rte_tcp_hdr *tcp; 559 struct rte_udp_hdr *udp; 560 int ret = 0; 561 562 key.ip_dst = rte_be_to_cpu_32(ipv4_hdr->dst_addr); 563 key.ip_src = rte_be_to_cpu_32(ipv4_hdr->src_addr); 564 key.proto = ipv4_hdr->next_proto_id; 565 566 switch (ipv4_hdr->next_proto_id) { 567 case IPPROTO_TCP: 568 tcp = (struct rte_tcp_hdr *)((unsigned char *)ipv4_hdr + 569 sizeof(struct rte_ipv4_hdr)); 570 key.port_dst = rte_be_to_cpu_16(tcp->dst_port); 571 key.port_src = rte_be_to_cpu_16(tcp->src_port); 572 break; 573 574 case IPPROTO_UDP: 575 udp = (struct rte_udp_hdr *)((unsigned char *)ipv4_hdr + 576 sizeof(struct rte_ipv4_hdr)); 577 key.port_dst = rte_be_to_cpu_16(udp->dst_port); 578 key.port_src = rte_be_to_cpu_16(udp->src_port); 579 break; 580 581 default: 582 key.port_dst = 0; 583 key.port_src = 0; 584 break; 585 } 586 587 /* Find destination port */ 588 ret = rte_hash_lookup(ipv4_l3fwd_lookup_struct, (const void *)&key); 589 return ((ret < 0) ? portid : ipv4_l3fwd_out_if[ret]); 590 } 591 592 static inline uint16_t 593 get_ipv6_dst_port(struct rte_ipv6_hdr *ipv6_hdr, uint16_t portid, 594 lookup_struct_t *ipv6_l3fwd_lookup_struct) 595 { 596 struct ipv6_5tuple key; 597 struct rte_tcp_hdr *tcp; 598 struct rte_udp_hdr *udp; 599 int ret = 0; 600 601 memcpy(key.ip_dst, ipv6_hdr->dst_addr, IPV6_ADDR_LEN); 602 memcpy(key.ip_src, ipv6_hdr->src_addr, IPV6_ADDR_LEN); 603 604 key.proto = ipv6_hdr->proto; 605 606 switch (ipv6_hdr->proto) { 607 case IPPROTO_TCP: 608 tcp = (struct rte_tcp_hdr *)((unsigned char *) ipv6_hdr + 609 sizeof(struct rte_ipv6_hdr)); 610 key.port_dst = rte_be_to_cpu_16(tcp->dst_port); 611 key.port_src = rte_be_to_cpu_16(tcp->src_port); 612 break; 613 614 case IPPROTO_UDP: 615 udp = (struct rte_udp_hdr *)((unsigned char *) ipv6_hdr + 616 sizeof(struct rte_ipv6_hdr)); 617 key.port_dst = rte_be_to_cpu_16(udp->dst_port); 618 key.port_src = rte_be_to_cpu_16(udp->src_port); 619 break; 620 621 default: 622 key.port_dst = 0; 623 key.port_src = 0; 624 break; 625 } 626 627 /* Find destination port */ 628 ret = rte_hash_lookup(ipv6_l3fwd_lookup_struct, (const void *)&key); 629 return ((ret < 0) ? portid : ipv6_l3fwd_out_if[ret]); 630 } 631 #endif 632 633 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 634 static inline uint16_t 635 get_ipv4_dst_port(struct rte_ipv4_hdr *ipv4_hdr, uint16_t portid, 636 lookup_struct_t *ipv4_l3fwd_lookup_struct) 637 { 638 uint32_t next_hop; 639 640 return ((rte_lpm_lookup(ipv4_l3fwd_lookup_struct, 641 rte_be_to_cpu_32(ipv4_hdr->dst_addr), &next_hop) == 0)? 642 next_hop : portid); 643 } 644 #endif 645 646 static inline void 647 parse_ptype_one(struct rte_mbuf *m) 648 { 649 struct rte_ether_hdr *eth_hdr; 650 uint32_t packet_type = RTE_PTYPE_UNKNOWN; 651 uint16_t ether_type; 652 653 eth_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); 654 ether_type = eth_hdr->ether_type; 655 if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4)) 656 packet_type |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN; 657 else if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6)) 658 packet_type |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN; 659 660 m->packet_type = packet_type; 661 } 662 663 static uint16_t 664 cb_parse_ptype(uint16_t port __rte_unused, uint16_t queue __rte_unused, 665 struct rte_mbuf *pkts[], uint16_t nb_pkts, 666 uint16_t max_pkts __rte_unused, 667 void *user_param __rte_unused) 668 { 669 unsigned int i; 670 671 for (i = 0; i < nb_pkts; ++i) 672 parse_ptype_one(pkts[i]); 673 674 return nb_pkts; 675 } 676 677 static int 678 add_cb_parse_ptype(uint16_t portid, uint16_t queueid) 679 { 680 printf("Port %d: softly parse packet type info\n", portid); 681 if (rte_eth_add_rx_callback(portid, queueid, cb_parse_ptype, NULL)) 682 return 0; 683 684 printf("Failed to add rx callback: port=%d\n", portid); 685 return -1; 686 } 687 688 static inline void 689 l3fwd_simple_forward(struct rte_mbuf *m, uint16_t portid, 690 struct lcore_conf *qconf) 691 { 692 struct rte_ether_hdr *eth_hdr; 693 struct rte_ipv4_hdr *ipv4_hdr; 694 void *d_addr_bytes; 695 uint16_t dst_port; 696 697 eth_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); 698 699 if (RTE_ETH_IS_IPV4_HDR(m->packet_type)) { 700 /* Handle IPv4 headers.*/ 701 ipv4_hdr = 702 rte_pktmbuf_mtod_offset(m, struct rte_ipv4_hdr *, 703 sizeof(struct rte_ether_hdr)); 704 705 #ifdef DO_RFC_1812_CHECKS 706 /* Check to make sure the packet is valid (RFC1812) */ 707 if (is_valid_ipv4_pkt(ipv4_hdr, m->pkt_len) < 0) { 708 rte_pktmbuf_free(m); 709 return; 710 } 711 #endif 712 713 dst_port = get_ipv4_dst_port(ipv4_hdr, portid, 714 qconf->ipv4_lookup_struct); 715 if (dst_port >= RTE_MAX_ETHPORTS || 716 (enabled_port_mask & 1 << dst_port) == 0) 717 dst_port = portid; 718 719 /* 02:00:00:00:00:xx */ 720 d_addr_bytes = ð_hdr->dst_addr.addr_bytes[0]; 721 *((uint64_t *)d_addr_bytes) = 722 0x000000000002 + ((uint64_t)dst_port << 40); 723 724 #ifdef DO_RFC_1812_CHECKS 725 /* Update time to live and header checksum */ 726 --(ipv4_hdr->time_to_live); 727 ++(ipv4_hdr->hdr_checksum); 728 #endif 729 730 /* src addr */ 731 rte_ether_addr_copy(&ports_eth_addr[dst_port], 732 ð_hdr->src_addr); 733 734 send_single_packet(m, dst_port); 735 } else if (RTE_ETH_IS_IPV6_HDR(m->packet_type)) { 736 /* Handle IPv6 headers.*/ 737 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 738 struct rte_ipv6_hdr *ipv6_hdr; 739 740 ipv6_hdr = 741 rte_pktmbuf_mtod_offset(m, struct rte_ipv6_hdr *, 742 sizeof(struct rte_ether_hdr)); 743 744 dst_port = get_ipv6_dst_port(ipv6_hdr, portid, 745 qconf->ipv6_lookup_struct); 746 747 if (dst_port >= RTE_MAX_ETHPORTS || 748 (enabled_port_mask & 1 << dst_port) == 0) 749 dst_port = portid; 750 751 /* 02:00:00:00:00:xx */ 752 d_addr_bytes = ð_hdr->dst_addr.addr_bytes[0]; 753 *((uint64_t *)d_addr_bytes) = 754 0x000000000002 + ((uint64_t)dst_port << 40); 755 756 /* src addr */ 757 rte_ether_addr_copy(&ports_eth_addr[dst_port], 758 ð_hdr->src_addr); 759 760 send_single_packet(m, dst_port); 761 #else 762 /* We don't currently handle IPv6 packets in LPM mode. */ 763 rte_pktmbuf_free(m); 764 #endif 765 } else 766 rte_pktmbuf_free(m); 767 768 } 769 770 #define MINIMUM_SLEEP_TIME 1 771 #define SUSPEND_THRESHOLD 300 772 773 static inline uint32_t 774 power_idle_heuristic(uint32_t zero_rx_packet_count) 775 { 776 /* If zero count is less than 100, sleep 1us */ 777 if (zero_rx_packet_count < SUSPEND_THRESHOLD) 778 return MINIMUM_SLEEP_TIME; 779 /* If zero count is less than 1000, sleep 100 us which is the 780 minimum latency switching from C3/C6 to C0 781 */ 782 else 783 return SUSPEND_THRESHOLD; 784 } 785 786 static inline enum freq_scale_hint_t 787 power_freq_scaleup_heuristic(unsigned lcore_id, 788 uint16_t port_id, 789 uint16_t queue_id) 790 { 791 uint32_t rxq_count = rte_eth_rx_queue_count(port_id, queue_id); 792 /** 793 * HW Rx queue size is 128 by default, Rx burst read at maximum 32 entries 794 * per iteration 795 */ 796 #define FREQ_GEAR1_RX_PACKET_THRESHOLD MAX_PKT_BURST 797 #define FREQ_GEAR2_RX_PACKET_THRESHOLD (MAX_PKT_BURST*2) 798 #define FREQ_GEAR3_RX_PACKET_THRESHOLD (MAX_PKT_BURST*3) 799 #define FREQ_UP_TREND1_ACC 1 800 #define FREQ_UP_TREND2_ACC 100 801 #define FREQ_UP_THRESHOLD 10000 802 803 if (likely(rxq_count > FREQ_GEAR3_RX_PACKET_THRESHOLD)) { 804 stats[lcore_id].trend = 0; 805 return FREQ_HIGHEST; 806 } else if (likely(rxq_count > FREQ_GEAR2_RX_PACKET_THRESHOLD)) 807 stats[lcore_id].trend += FREQ_UP_TREND2_ACC; 808 else if (likely(rxq_count > FREQ_GEAR1_RX_PACKET_THRESHOLD)) 809 stats[lcore_id].trend += FREQ_UP_TREND1_ACC; 810 811 if (likely(stats[lcore_id].trend > FREQ_UP_THRESHOLD)) { 812 stats[lcore_id].trend = 0; 813 return FREQ_HIGHER; 814 } 815 816 return FREQ_CURRENT; 817 } 818 819 /** 820 * force polling thread sleep until one-shot rx interrupt triggers 821 * @param port_id 822 * Port id. 823 * @param queue_id 824 * Rx queue id. 825 * @return 826 * 0 on success 827 */ 828 static int 829 sleep_until_rx_interrupt(int num, int lcore) 830 { 831 /* 832 * we want to track when we are woken up by traffic so that we can go 833 * back to sleep again without log spamming. Avoid cache line sharing 834 * to prevent threads stepping on each others' toes. 835 */ 836 static struct { 837 bool wakeup; 838 } __rte_cache_aligned status[RTE_MAX_LCORE]; 839 struct rte_epoll_event event[num]; 840 int n, i; 841 uint16_t port_id; 842 uint8_t queue_id; 843 void *data; 844 845 if (status[lcore].wakeup) { 846 RTE_LOG(INFO, L3FWD_POWER, 847 "lcore %u sleeps until interrupt triggers\n", 848 rte_lcore_id()); 849 } 850 851 n = rte_epoll_wait(RTE_EPOLL_PER_THREAD, event, num, 10); 852 for (i = 0; i < n; i++) { 853 data = event[i].epdata.data; 854 port_id = ((uintptr_t)data) >> CHAR_BIT; 855 queue_id = ((uintptr_t)data) & 856 RTE_LEN2MASK(CHAR_BIT, uint8_t); 857 RTE_LOG(INFO, L3FWD_POWER, 858 "lcore %u is waked up from rx interrupt on" 859 " port %d queue %d\n", 860 rte_lcore_id(), port_id, queue_id); 861 } 862 status[lcore].wakeup = n != 0; 863 864 return 0; 865 } 866 867 static void turn_on_off_intr(struct lcore_conf *qconf, bool on) 868 { 869 int i; 870 struct lcore_rx_queue *rx_queue; 871 uint8_t queue_id; 872 uint16_t port_id; 873 874 for (i = 0; i < qconf->n_rx_queue; ++i) { 875 rx_queue = &(qconf->rx_queue_list[i]); 876 port_id = rx_queue->port_id; 877 queue_id = rx_queue->queue_id; 878 879 rte_spinlock_lock(&(locks[port_id])); 880 if (on) 881 rte_eth_dev_rx_intr_enable(port_id, queue_id); 882 else 883 rte_eth_dev_rx_intr_disable(port_id, queue_id); 884 rte_spinlock_unlock(&(locks[port_id])); 885 } 886 } 887 888 static int event_register(struct lcore_conf *qconf) 889 { 890 struct lcore_rx_queue *rx_queue; 891 uint8_t queueid; 892 uint16_t portid; 893 uint32_t data; 894 int ret; 895 int i; 896 897 for (i = 0; i < qconf->n_rx_queue; ++i) { 898 rx_queue = &(qconf->rx_queue_list[i]); 899 portid = rx_queue->port_id; 900 queueid = rx_queue->queue_id; 901 data = portid << CHAR_BIT | queueid; 902 903 ret = rte_eth_dev_rx_intr_ctl_q(portid, queueid, 904 RTE_EPOLL_PER_THREAD, 905 RTE_INTR_EVENT_ADD, 906 (void *)((uintptr_t)data)); 907 if (ret) 908 return ret; 909 } 910 911 return 0; 912 } 913 914 /* Main processing loop. 8< */ 915 static int main_intr_loop(__rte_unused void *dummy) 916 { 917 struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; 918 unsigned int lcore_id; 919 uint64_t prev_tsc, diff_tsc, cur_tsc; 920 int i, j, nb_rx; 921 uint8_t queueid; 922 uint16_t portid; 923 struct lcore_conf *qconf; 924 struct lcore_rx_queue *rx_queue; 925 uint32_t lcore_rx_idle_count = 0; 926 uint32_t lcore_idle_hint = 0; 927 int intr_en = 0; 928 929 const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / 930 US_PER_S * BURST_TX_DRAIN_US; 931 932 prev_tsc = 0; 933 934 lcore_id = rte_lcore_id(); 935 qconf = &lcore_conf[lcore_id]; 936 937 if (qconf->n_rx_queue == 0) { 938 RTE_LOG(INFO, L3FWD_POWER, "lcore %u has nothing to do\n", 939 lcore_id); 940 return 0; 941 } 942 943 RTE_LOG(INFO, L3FWD_POWER, "entering main interrupt loop on lcore %u\n", 944 lcore_id); 945 946 for (i = 0; i < qconf->n_rx_queue; i++) { 947 portid = qconf->rx_queue_list[i].port_id; 948 queueid = qconf->rx_queue_list[i].queue_id; 949 RTE_LOG(INFO, L3FWD_POWER, 950 " -- lcoreid=%u portid=%u rxqueueid=%hhu\n", 951 lcore_id, portid, queueid); 952 } 953 954 /* add into event wait list */ 955 if (event_register(qconf) == 0) 956 intr_en = 1; 957 else 958 RTE_LOG(INFO, L3FWD_POWER, "RX interrupt won't enable.\n"); 959 960 while (!is_done()) { 961 stats[lcore_id].nb_iteration_looped++; 962 963 cur_tsc = rte_rdtsc(); 964 965 /* 966 * TX burst queue drain 967 */ 968 diff_tsc = cur_tsc - prev_tsc; 969 if (unlikely(diff_tsc > drain_tsc)) { 970 for (i = 0; i < qconf->n_tx_port; ++i) { 971 portid = qconf->tx_port_id[i]; 972 rte_eth_tx_buffer_flush(portid, 973 qconf->tx_queue_id[portid], 974 qconf->tx_buffer[portid]); 975 } 976 prev_tsc = cur_tsc; 977 } 978 979 start_rx: 980 /* 981 * Read packet from RX queues 982 */ 983 lcore_rx_idle_count = 0; 984 for (i = 0; i < qconf->n_rx_queue; ++i) { 985 rx_queue = &(qconf->rx_queue_list[i]); 986 rx_queue->idle_hint = 0; 987 portid = rx_queue->port_id; 988 queueid = rx_queue->queue_id; 989 990 nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst, 991 MAX_PKT_BURST); 992 993 stats[lcore_id].nb_rx_processed += nb_rx; 994 if (unlikely(nb_rx == 0)) { 995 /** 996 * no packet received from rx queue, try to 997 * sleep for a while forcing CPU enter deeper 998 * C states. 999 */ 1000 rx_queue->zero_rx_packet_count++; 1001 1002 if (rx_queue->zero_rx_packet_count <= 1003 MIN_ZERO_POLL_COUNT) 1004 continue; 1005 1006 rx_queue->idle_hint = power_idle_heuristic( 1007 rx_queue->zero_rx_packet_count); 1008 lcore_rx_idle_count++; 1009 } else { 1010 rx_queue->zero_rx_packet_count = 0; 1011 } 1012 1013 /* Prefetch first packets */ 1014 for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++) { 1015 rte_prefetch0(rte_pktmbuf_mtod( 1016 pkts_burst[j], void *)); 1017 } 1018 1019 /* Prefetch and forward already prefetched packets */ 1020 for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) { 1021 rte_prefetch0(rte_pktmbuf_mtod( 1022 pkts_burst[j + PREFETCH_OFFSET], 1023 void *)); 1024 l3fwd_simple_forward( 1025 pkts_burst[j], portid, qconf); 1026 } 1027 1028 /* Forward remaining prefetched packets */ 1029 for (; j < nb_rx; j++) { 1030 l3fwd_simple_forward( 1031 pkts_burst[j], portid, qconf); 1032 } 1033 } 1034 1035 if (unlikely(lcore_rx_idle_count == qconf->n_rx_queue)) { 1036 /** 1037 * All Rx queues empty in recent consecutive polls, 1038 * sleep in a conservative manner, meaning sleep as 1039 * less as possible. 1040 */ 1041 for (i = 1, 1042 lcore_idle_hint = qconf->rx_queue_list[0].idle_hint; 1043 i < qconf->n_rx_queue; ++i) { 1044 rx_queue = &(qconf->rx_queue_list[i]); 1045 if (rx_queue->idle_hint < lcore_idle_hint) 1046 lcore_idle_hint = rx_queue->idle_hint; 1047 } 1048 1049 if (lcore_idle_hint < SUSPEND_THRESHOLD) 1050 /** 1051 * execute "pause" instruction to avoid context 1052 * switch which generally take hundred of 1053 * microseconds for short sleep. 1054 */ 1055 rte_delay_us(lcore_idle_hint); 1056 else { 1057 /* suspend until rx interrupt triggers */ 1058 if (intr_en) { 1059 turn_on_off_intr(qconf, 1); 1060 sleep_until_rx_interrupt( 1061 qconf->n_rx_queue, 1062 lcore_id); 1063 turn_on_off_intr(qconf, 0); 1064 /** 1065 * start receiving packets immediately 1066 */ 1067 if (likely(!is_done())) 1068 goto start_rx; 1069 } 1070 } 1071 stats[lcore_id].sleep_time += lcore_idle_hint; 1072 } 1073 } 1074 1075 return 0; 1076 } 1077 /* >8 End of main processing loop. */ 1078 1079 /* main processing loop */ 1080 static int 1081 main_telemetry_loop(__rte_unused void *dummy) 1082 { 1083 struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; 1084 unsigned int lcore_id; 1085 uint64_t prev_tsc, diff_tsc, cur_tsc, prev_tel_tsc; 1086 int i, j, nb_rx; 1087 uint8_t queueid; 1088 uint16_t portid; 1089 struct lcore_conf *qconf; 1090 struct lcore_rx_queue *rx_queue; 1091 uint64_t ep_nep[2] = {0}, fp_nfp[2] = {0}; 1092 uint64_t poll_count; 1093 enum busy_rate br; 1094 1095 const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / 1096 US_PER_S * BURST_TX_DRAIN_US; 1097 1098 poll_count = 0; 1099 prev_tsc = 0; 1100 prev_tel_tsc = 0; 1101 1102 lcore_id = rte_lcore_id(); 1103 qconf = &lcore_conf[lcore_id]; 1104 1105 if (qconf->n_rx_queue == 0) { 1106 RTE_LOG(INFO, L3FWD_POWER, "lcore %u has nothing to do\n", 1107 lcore_id); 1108 return 0; 1109 } 1110 1111 RTE_LOG(INFO, L3FWD_POWER, "entering main telemetry loop on lcore %u\n", 1112 lcore_id); 1113 1114 for (i = 0; i < qconf->n_rx_queue; i++) { 1115 portid = qconf->rx_queue_list[i].port_id; 1116 queueid = qconf->rx_queue_list[i].queue_id; 1117 RTE_LOG(INFO, L3FWD_POWER, " -- lcoreid=%u portid=%u " 1118 "rxqueueid=%hhu\n", lcore_id, portid, queueid); 1119 } 1120 1121 while (!is_done()) { 1122 1123 cur_tsc = rte_rdtsc(); 1124 /* 1125 * TX burst queue drain 1126 */ 1127 diff_tsc = cur_tsc - prev_tsc; 1128 if (unlikely(diff_tsc > drain_tsc)) { 1129 for (i = 0; i < qconf->n_tx_port; ++i) { 1130 portid = qconf->tx_port_id[i]; 1131 rte_eth_tx_buffer_flush(portid, 1132 qconf->tx_queue_id[portid], 1133 qconf->tx_buffer[portid]); 1134 } 1135 prev_tsc = cur_tsc; 1136 } 1137 1138 /* 1139 * Read packet from RX queues 1140 */ 1141 for (i = 0; i < qconf->n_rx_queue; ++i) { 1142 rx_queue = &(qconf->rx_queue_list[i]); 1143 portid = rx_queue->port_id; 1144 queueid = rx_queue->queue_id; 1145 1146 nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst, 1147 MAX_PKT_BURST); 1148 ep_nep[nb_rx == 0]++; 1149 fp_nfp[nb_rx == MAX_PKT_BURST]++; 1150 poll_count++; 1151 if (unlikely(nb_rx == 0)) 1152 continue; 1153 1154 /* Prefetch first packets */ 1155 for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++) { 1156 rte_prefetch0(rte_pktmbuf_mtod( 1157 pkts_burst[j], void *)); 1158 } 1159 1160 /* Prefetch and forward already prefetched packets */ 1161 for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) { 1162 rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[ 1163 j + PREFETCH_OFFSET], void *)); 1164 l3fwd_simple_forward(pkts_burst[j], portid, 1165 qconf); 1166 } 1167 1168 /* Forward remaining prefetched packets */ 1169 for (; j < nb_rx; j++) { 1170 l3fwd_simple_forward(pkts_burst[j], portid, 1171 qconf); 1172 } 1173 } 1174 if (unlikely(poll_count >= DEFAULT_COUNT)) { 1175 diff_tsc = cur_tsc - prev_tel_tsc; 1176 if (diff_tsc >= MAX_CYCLES) { 1177 br = FULL; 1178 } else if (diff_tsc > MIN_CYCLES && 1179 diff_tsc < MAX_CYCLES) { 1180 br = (diff_tsc * 100) / MAX_CYCLES; 1181 } else { 1182 br = ZERO; 1183 } 1184 poll_count = 0; 1185 prev_tel_tsc = cur_tsc; 1186 /* update stats for telemetry */ 1187 rte_spinlock_lock(&stats[lcore_id].telemetry_lock); 1188 stats[lcore_id].ep_nep[0] = ep_nep[0]; 1189 stats[lcore_id].ep_nep[1] = ep_nep[1]; 1190 stats[lcore_id].fp_nfp[0] = fp_nfp[0]; 1191 stats[lcore_id].fp_nfp[1] = fp_nfp[1]; 1192 stats[lcore_id].br = br; 1193 rte_spinlock_unlock(&stats[lcore_id].telemetry_lock); 1194 } 1195 } 1196 1197 return 0; 1198 } 1199 /* main processing loop */ 1200 static int 1201 main_empty_poll_loop(__rte_unused void *dummy) 1202 { 1203 struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; 1204 unsigned int lcore_id; 1205 uint64_t prev_tsc, diff_tsc, cur_tsc; 1206 int i, j, nb_rx; 1207 uint8_t queueid; 1208 uint16_t portid; 1209 struct lcore_conf *qconf; 1210 struct lcore_rx_queue *rx_queue; 1211 1212 const uint64_t drain_tsc = 1213 (rte_get_tsc_hz() + US_PER_S - 1) / 1214 US_PER_S * BURST_TX_DRAIN_US; 1215 1216 prev_tsc = 0; 1217 1218 lcore_id = rte_lcore_id(); 1219 qconf = &lcore_conf[lcore_id]; 1220 1221 if (qconf->n_rx_queue == 0) { 1222 RTE_LOG(INFO, L3FWD_POWER, "lcore %u has nothing to do\n", 1223 lcore_id); 1224 return 0; 1225 } 1226 1227 for (i = 0; i < qconf->n_rx_queue; i++) { 1228 portid = qconf->rx_queue_list[i].port_id; 1229 queueid = qconf->rx_queue_list[i].queue_id; 1230 RTE_LOG(INFO, L3FWD_POWER, " -- lcoreid=%u portid=%u " 1231 "rxqueueid=%hhu\n", lcore_id, portid, queueid); 1232 } 1233 1234 while (!is_done()) { 1235 stats[lcore_id].nb_iteration_looped++; 1236 1237 cur_tsc = rte_rdtsc(); 1238 /* 1239 * TX burst queue drain 1240 */ 1241 diff_tsc = cur_tsc - prev_tsc; 1242 if (unlikely(diff_tsc > drain_tsc)) { 1243 for (i = 0; i < qconf->n_tx_port; ++i) { 1244 portid = qconf->tx_port_id[i]; 1245 rte_eth_tx_buffer_flush(portid, 1246 qconf->tx_queue_id[portid], 1247 qconf->tx_buffer[portid]); 1248 } 1249 prev_tsc = cur_tsc; 1250 } 1251 1252 /* 1253 * Read packet from RX queues 1254 */ 1255 for (i = 0; i < qconf->n_rx_queue; ++i) { 1256 rx_queue = &(qconf->rx_queue_list[i]); 1257 rx_queue->idle_hint = 0; 1258 portid = rx_queue->port_id; 1259 queueid = rx_queue->queue_id; 1260 1261 nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst, 1262 MAX_PKT_BURST); 1263 1264 stats[lcore_id].nb_rx_processed += nb_rx; 1265 1266 if (nb_rx == 0) { 1267 1268 rte_power_empty_poll_stat_update(lcore_id); 1269 1270 continue; 1271 } else { 1272 rte_power_poll_stat_update(lcore_id, nb_rx); 1273 } 1274 1275 1276 /* Prefetch first packets */ 1277 for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++) { 1278 rte_prefetch0(rte_pktmbuf_mtod( 1279 pkts_burst[j], void *)); 1280 } 1281 1282 /* Prefetch and forward already prefetched packets */ 1283 for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) { 1284 rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[ 1285 j + PREFETCH_OFFSET], 1286 void *)); 1287 l3fwd_simple_forward(pkts_burst[j], portid, 1288 qconf); 1289 } 1290 1291 /* Forward remaining prefetched packets */ 1292 for (; j < nb_rx; j++) { 1293 l3fwd_simple_forward(pkts_burst[j], portid, 1294 qconf); 1295 } 1296 1297 } 1298 1299 } 1300 1301 return 0; 1302 } 1303 /* main processing loop */ 1304 static int 1305 main_legacy_loop(__rte_unused void *dummy) 1306 { 1307 struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; 1308 unsigned lcore_id; 1309 uint64_t prev_tsc, diff_tsc, cur_tsc, tim_res_tsc, hz; 1310 uint64_t prev_tsc_power = 0, cur_tsc_power, diff_tsc_power; 1311 int i, j, nb_rx; 1312 uint8_t queueid; 1313 uint16_t portid; 1314 struct lcore_conf *qconf; 1315 struct lcore_rx_queue *rx_queue; 1316 enum freq_scale_hint_t lcore_scaleup_hint; 1317 uint32_t lcore_rx_idle_count = 0; 1318 uint32_t lcore_idle_hint = 0; 1319 int intr_en = 0; 1320 1321 const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * BURST_TX_DRAIN_US; 1322 1323 prev_tsc = 0; 1324 hz = rte_get_timer_hz(); 1325 tim_res_tsc = hz/TIMER_NUMBER_PER_SECOND; 1326 1327 lcore_id = rte_lcore_id(); 1328 qconf = &lcore_conf[lcore_id]; 1329 1330 if (qconf->n_rx_queue == 0) { 1331 RTE_LOG(INFO, L3FWD_POWER, "lcore %u has nothing to do\n", lcore_id); 1332 return 0; 1333 } 1334 1335 RTE_LOG(INFO, L3FWD_POWER, "entering main loop on lcore %u\n", lcore_id); 1336 1337 for (i = 0; i < qconf->n_rx_queue; i++) { 1338 portid = qconf->rx_queue_list[i].port_id; 1339 queueid = qconf->rx_queue_list[i].queue_id; 1340 RTE_LOG(INFO, L3FWD_POWER, " -- lcoreid=%u portid=%u " 1341 "rxqueueid=%hhu\n", lcore_id, portid, queueid); 1342 } 1343 1344 /* add into event wait list */ 1345 if (event_register(qconf) == 0) 1346 intr_en = 1; 1347 else 1348 RTE_LOG(INFO, L3FWD_POWER, "RX interrupt won't enable.\n"); 1349 1350 while (!is_done()) { 1351 stats[lcore_id].nb_iteration_looped++; 1352 1353 cur_tsc = rte_rdtsc(); 1354 cur_tsc_power = cur_tsc; 1355 1356 /* 1357 * TX burst queue drain 1358 */ 1359 diff_tsc = cur_tsc - prev_tsc; 1360 if (unlikely(diff_tsc > drain_tsc)) { 1361 for (i = 0; i < qconf->n_tx_port; ++i) { 1362 portid = qconf->tx_port_id[i]; 1363 rte_eth_tx_buffer_flush(portid, 1364 qconf->tx_queue_id[portid], 1365 qconf->tx_buffer[portid]); 1366 } 1367 prev_tsc = cur_tsc; 1368 } 1369 1370 diff_tsc_power = cur_tsc_power - prev_tsc_power; 1371 if (diff_tsc_power > tim_res_tsc) { 1372 rte_timer_manage(); 1373 prev_tsc_power = cur_tsc_power; 1374 } 1375 1376 start_rx: 1377 /* 1378 * Read packet from RX queues 1379 */ 1380 lcore_scaleup_hint = FREQ_CURRENT; 1381 lcore_rx_idle_count = 0; 1382 for (i = 0; i < qconf->n_rx_queue; ++i) { 1383 rx_queue = &(qconf->rx_queue_list[i]); 1384 rx_queue->idle_hint = 0; 1385 portid = rx_queue->port_id; 1386 queueid = rx_queue->queue_id; 1387 1388 nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst, 1389 MAX_PKT_BURST); 1390 1391 stats[lcore_id].nb_rx_processed += nb_rx; 1392 if (unlikely(nb_rx == 0)) { 1393 /** 1394 * no packet received from rx queue, try to 1395 * sleep for a while forcing CPU enter deeper 1396 * C states. 1397 */ 1398 rx_queue->zero_rx_packet_count++; 1399 1400 if (rx_queue->zero_rx_packet_count <= 1401 MIN_ZERO_POLL_COUNT) 1402 continue; 1403 1404 rx_queue->idle_hint = power_idle_heuristic(\ 1405 rx_queue->zero_rx_packet_count); 1406 lcore_rx_idle_count++; 1407 } else { 1408 rx_queue->zero_rx_packet_count = 0; 1409 1410 /** 1411 * do not scale up frequency immediately as 1412 * user to kernel space communication is costly 1413 * which might impact packet I/O for received 1414 * packets. 1415 */ 1416 rx_queue->freq_up_hint = 1417 power_freq_scaleup_heuristic(lcore_id, 1418 portid, queueid); 1419 } 1420 1421 /* Prefetch first packets */ 1422 for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++) { 1423 rte_prefetch0(rte_pktmbuf_mtod( 1424 pkts_burst[j], void *)); 1425 } 1426 1427 /* Prefetch and forward already prefetched packets */ 1428 for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) { 1429 rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[ 1430 j + PREFETCH_OFFSET], void *)); 1431 l3fwd_simple_forward(pkts_burst[j], portid, 1432 qconf); 1433 } 1434 1435 /* Forward remaining prefetched packets */ 1436 for (; j < nb_rx; j++) { 1437 l3fwd_simple_forward(pkts_burst[j], portid, 1438 qconf); 1439 } 1440 } 1441 1442 if (likely(lcore_rx_idle_count != qconf->n_rx_queue)) { 1443 for (i = 1, lcore_scaleup_hint = 1444 qconf->rx_queue_list[0].freq_up_hint; 1445 i < qconf->n_rx_queue; ++i) { 1446 rx_queue = &(qconf->rx_queue_list[i]); 1447 if (rx_queue->freq_up_hint > 1448 lcore_scaleup_hint) 1449 lcore_scaleup_hint = 1450 rx_queue->freq_up_hint; 1451 } 1452 1453 if (lcore_scaleup_hint == FREQ_HIGHEST) { 1454 if (rte_power_freq_max) 1455 rte_power_freq_max(lcore_id); 1456 } else if (lcore_scaleup_hint == FREQ_HIGHER) { 1457 if (rte_power_freq_up) 1458 rte_power_freq_up(lcore_id); 1459 } 1460 } else { 1461 /** 1462 * All Rx queues empty in recent consecutive polls, 1463 * sleep in a conservative manner, meaning sleep as 1464 * less as possible. 1465 */ 1466 for (i = 1, lcore_idle_hint = 1467 qconf->rx_queue_list[0].idle_hint; 1468 i < qconf->n_rx_queue; ++i) { 1469 rx_queue = &(qconf->rx_queue_list[i]); 1470 if (rx_queue->idle_hint < lcore_idle_hint) 1471 lcore_idle_hint = rx_queue->idle_hint; 1472 } 1473 1474 if (lcore_idle_hint < SUSPEND_THRESHOLD) 1475 /** 1476 * execute "pause" instruction to avoid context 1477 * switch which generally take hundred of 1478 * microseconds for short sleep. 1479 */ 1480 rte_delay_us(lcore_idle_hint); 1481 else { 1482 /* suspend until rx interrupt triggers */ 1483 if (intr_en) { 1484 turn_on_off_intr(qconf, 1); 1485 sleep_until_rx_interrupt( 1486 qconf->n_rx_queue, 1487 lcore_id); 1488 turn_on_off_intr(qconf, 0); 1489 /** 1490 * start receiving packets immediately 1491 */ 1492 if (likely(!is_done())) 1493 goto start_rx; 1494 } 1495 } 1496 stats[lcore_id].sleep_time += lcore_idle_hint; 1497 } 1498 } 1499 1500 return 0; 1501 } 1502 1503 static int 1504 check_lcore_params(void) 1505 { 1506 uint8_t queue, lcore; 1507 uint16_t i; 1508 int socketid; 1509 1510 for (i = 0; i < nb_lcore_params; ++i) { 1511 queue = lcore_params[i].queue_id; 1512 if (queue >= MAX_RX_QUEUE_PER_PORT) { 1513 printf("invalid queue number: %hhu\n", queue); 1514 return -1; 1515 } 1516 lcore = lcore_params[i].lcore_id; 1517 if (!rte_lcore_is_enabled(lcore)) { 1518 printf("error: lcore %hhu is not enabled in lcore " 1519 "mask\n", lcore); 1520 return -1; 1521 } 1522 if ((socketid = rte_lcore_to_socket_id(lcore) != 0) && 1523 (numa_on == 0)) { 1524 printf("warning: lcore %hhu is on socket %d with numa " 1525 "off\n", lcore, socketid); 1526 } 1527 if (app_mode == APP_MODE_TELEMETRY && lcore == rte_lcore_id()) { 1528 printf("cannot enable main core %d in config for telemetry mode\n", 1529 rte_lcore_id()); 1530 return -1; 1531 } 1532 } 1533 return 0; 1534 } 1535 1536 static int 1537 check_port_config(void) 1538 { 1539 unsigned portid; 1540 uint16_t i; 1541 1542 for (i = 0; i < nb_lcore_params; ++i) { 1543 portid = lcore_params[i].port_id; 1544 if ((enabled_port_mask & (1 << portid)) == 0) { 1545 printf("port %u is not enabled in port mask\n", 1546 portid); 1547 return -1; 1548 } 1549 if (!rte_eth_dev_is_valid_port(portid)) { 1550 printf("port %u is not present on the board\n", 1551 portid); 1552 return -1; 1553 } 1554 } 1555 return 0; 1556 } 1557 1558 static uint8_t 1559 get_port_n_rx_queues(const uint16_t port) 1560 { 1561 int queue = -1; 1562 uint16_t i; 1563 1564 for (i = 0; i < nb_lcore_params; ++i) { 1565 if (lcore_params[i].port_id == port && 1566 lcore_params[i].queue_id > queue) 1567 queue = lcore_params[i].queue_id; 1568 } 1569 return (uint8_t)(++queue); 1570 } 1571 1572 static int 1573 init_lcore_rx_queues(void) 1574 { 1575 uint16_t i, nb_rx_queue; 1576 uint8_t lcore; 1577 1578 for (i = 0; i < nb_lcore_params; ++i) { 1579 lcore = lcore_params[i].lcore_id; 1580 nb_rx_queue = lcore_conf[lcore].n_rx_queue; 1581 if (nb_rx_queue >= MAX_RX_QUEUE_PER_LCORE) { 1582 printf("error: too many queues (%u) for lcore: %u\n", 1583 (unsigned)nb_rx_queue + 1, (unsigned)lcore); 1584 return -1; 1585 } else { 1586 lcore_conf[lcore].rx_queue_list[nb_rx_queue].port_id = 1587 lcore_params[i].port_id; 1588 lcore_conf[lcore].rx_queue_list[nb_rx_queue].queue_id = 1589 lcore_params[i].queue_id; 1590 lcore_conf[lcore].n_rx_queue++; 1591 } 1592 } 1593 return 0; 1594 } 1595 1596 /* display usage */ 1597 static void 1598 print_usage(const char *prgname) 1599 { 1600 printf ("%s [EAL options] -- -p PORTMASK -P" 1601 " [--config (port,queue,lcore)[,(port,queue,lcore]]" 1602 " [--high-perf-cores CORELIST" 1603 " [--perf-config (port,queue,hi_perf,lcore_index)[,(port,queue,hi_perf,lcore_index]]" 1604 " [--enable-jumbo [--max-pkt-len PKTLEN]]\n" 1605 " -p PORTMASK: hexadecimal bitmask of ports to configure\n" 1606 " -P : enable promiscuous mode\n" 1607 " --config (port,queue,lcore): rx queues configuration\n" 1608 " --high-perf-cores CORELIST: list of high performance cores\n" 1609 " --perf-config: similar as config, cores specified as indices" 1610 " for bins containing high or regular performance cores\n" 1611 " --no-numa: optional, disable numa awareness\n" 1612 " --enable-jumbo: enable jumbo frame" 1613 " which max packet len is PKTLEN in decimal (64-9600)\n" 1614 " --parse-ptype: parse packet type by software\n" 1615 " --legacy: use legacy interrupt-based scaling\n" 1616 " --empty-poll: enable empty poll detection" 1617 " follow (training_flag, high_threshold, med_threshold)\n" 1618 " --telemetry: enable telemetry mode, to update" 1619 " empty polls, full polls, and core busyness to telemetry\n" 1620 " --interrupt-only: enable interrupt-only mode\n" 1621 " --pmd-mgmt MODE: enable PMD power management mode. " 1622 "Currently supported modes: baseline, monitor, pause, scale\n", 1623 prgname); 1624 } 1625 1626 static int parse_max_pkt_len(const char *pktlen) 1627 { 1628 char *end = NULL; 1629 unsigned long len; 1630 1631 /* parse decimal string */ 1632 len = strtoul(pktlen, &end, 10); 1633 if ((pktlen[0] == '\0') || (end == NULL) || (*end != '\0')) 1634 return -1; 1635 1636 if (len == 0) 1637 return -1; 1638 1639 return len; 1640 } 1641 1642 static int 1643 parse_portmask(const char *portmask) 1644 { 1645 char *end = NULL; 1646 unsigned long pm; 1647 1648 /* parse hexadecimal string */ 1649 pm = strtoul(portmask, &end, 16); 1650 if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0')) 1651 return 0; 1652 1653 return pm; 1654 } 1655 1656 static int 1657 parse_config(const char *q_arg) 1658 { 1659 char s[256]; 1660 const char *p, *p0 = q_arg; 1661 char *end; 1662 enum fieldnames { 1663 FLD_PORT = 0, 1664 FLD_QUEUE, 1665 FLD_LCORE, 1666 _NUM_FLD 1667 }; 1668 unsigned long int_fld[_NUM_FLD]; 1669 char *str_fld[_NUM_FLD]; 1670 int i; 1671 unsigned size; 1672 1673 nb_lcore_params = 0; 1674 1675 while ((p = strchr(p0,'(')) != NULL) { 1676 ++p; 1677 if((p0 = strchr(p,')')) == NULL) 1678 return -1; 1679 1680 size = p0 - p; 1681 if(size >= sizeof(s)) 1682 return -1; 1683 1684 snprintf(s, sizeof(s), "%.*s", size, p); 1685 if (rte_strsplit(s, sizeof(s), str_fld, _NUM_FLD, ',') != 1686 _NUM_FLD) 1687 return -1; 1688 for (i = 0; i < _NUM_FLD; i++){ 1689 errno = 0; 1690 int_fld[i] = strtoul(str_fld[i], &end, 0); 1691 if (errno != 0 || end == str_fld[i] || int_fld[i] > 1692 255) 1693 return -1; 1694 } 1695 if (nb_lcore_params >= MAX_LCORE_PARAMS) { 1696 printf("exceeded max number of lcore params: %hu\n", 1697 nb_lcore_params); 1698 return -1; 1699 } 1700 lcore_params_array[nb_lcore_params].port_id = 1701 (uint8_t)int_fld[FLD_PORT]; 1702 lcore_params_array[nb_lcore_params].queue_id = 1703 (uint8_t)int_fld[FLD_QUEUE]; 1704 lcore_params_array[nb_lcore_params].lcore_id = 1705 (uint8_t)int_fld[FLD_LCORE]; 1706 ++nb_lcore_params; 1707 } 1708 lcore_params = lcore_params_array; 1709 1710 return 0; 1711 } 1712 1713 static int 1714 parse_pmd_mgmt_config(const char *name) 1715 { 1716 #define PMD_MGMT_MONITOR "monitor" 1717 #define PMD_MGMT_PAUSE "pause" 1718 #define PMD_MGMT_SCALE "scale" 1719 #define PMD_MGMT_BASELINE "baseline" 1720 1721 if (strncmp(PMD_MGMT_MONITOR, name, sizeof(PMD_MGMT_MONITOR)) == 0) { 1722 pmgmt_type = RTE_POWER_MGMT_TYPE_MONITOR; 1723 return 0; 1724 } 1725 1726 if (strncmp(PMD_MGMT_PAUSE, name, sizeof(PMD_MGMT_PAUSE)) == 0) { 1727 pmgmt_type = RTE_POWER_MGMT_TYPE_PAUSE; 1728 return 0; 1729 } 1730 1731 if (strncmp(PMD_MGMT_SCALE, name, sizeof(PMD_MGMT_SCALE)) == 0) { 1732 pmgmt_type = RTE_POWER_MGMT_TYPE_SCALE; 1733 return 0; 1734 } 1735 if (strncmp(PMD_MGMT_BASELINE, name, sizeof(PMD_MGMT_BASELINE)) == 0) { 1736 baseline_enabled = true; 1737 return 0; 1738 } 1739 /* unknown PMD power management mode */ 1740 return -1; 1741 } 1742 1743 static int 1744 parse_ep_config(const char *q_arg) 1745 { 1746 char s[256]; 1747 const char *p = q_arg; 1748 char *end; 1749 int num_arg; 1750 1751 char *str_fld[3]; 1752 1753 int training_flag; 1754 int med_edpi; 1755 int hgh_edpi; 1756 1757 ep_med_edpi = EMPTY_POLL_MED_THRESHOLD; 1758 ep_hgh_edpi = EMPTY_POLL_HGH_THRESHOLD; 1759 1760 strlcpy(s, p, sizeof(s)); 1761 1762 num_arg = rte_strsplit(s, sizeof(s), str_fld, 3, ','); 1763 1764 empty_poll_train = false; 1765 1766 if (num_arg == 0) 1767 return 0; 1768 1769 if (num_arg == 3) { 1770 1771 training_flag = strtoul(str_fld[0], &end, 0); 1772 med_edpi = strtoul(str_fld[1], &end, 0); 1773 hgh_edpi = strtoul(str_fld[2], &end, 0); 1774 1775 if (training_flag == 1) 1776 empty_poll_train = true; 1777 1778 if (med_edpi > 0) 1779 ep_med_edpi = med_edpi; 1780 1781 if (hgh_edpi > 0) 1782 ep_hgh_edpi = hgh_edpi; 1783 1784 } else { 1785 1786 return -1; 1787 } 1788 1789 return 0; 1790 1791 } 1792 #define CMD_LINE_OPT_PARSE_PTYPE "parse-ptype" 1793 #define CMD_LINE_OPT_LEGACY "legacy" 1794 #define CMD_LINE_OPT_EMPTY_POLL "empty-poll" 1795 #define CMD_LINE_OPT_INTERRUPT_ONLY "interrupt-only" 1796 #define CMD_LINE_OPT_TELEMETRY "telemetry" 1797 #define CMD_LINE_OPT_PMD_MGMT "pmd-mgmt" 1798 1799 /* Parse the argument given in the command line of the application */ 1800 static int 1801 parse_args(int argc, char **argv) 1802 { 1803 int opt, ret; 1804 char **argvopt; 1805 int option_index; 1806 uint32_t limit; 1807 char *prgname = argv[0]; 1808 static struct option lgopts[] = { 1809 {"config", 1, 0, 0}, 1810 {"perf-config", 1, 0, 0}, 1811 {"high-perf-cores", 1, 0, 0}, 1812 {"no-numa", 0, 0, 0}, 1813 {"enable-jumbo", 0, 0, 0}, 1814 {CMD_LINE_OPT_EMPTY_POLL, 1, 0, 0}, 1815 {CMD_LINE_OPT_PARSE_PTYPE, 0, 0, 0}, 1816 {CMD_LINE_OPT_LEGACY, 0, 0, 0}, 1817 {CMD_LINE_OPT_TELEMETRY, 0, 0, 0}, 1818 {CMD_LINE_OPT_INTERRUPT_ONLY, 0, 0, 0}, 1819 {CMD_LINE_OPT_PMD_MGMT, 1, 0, 0}, 1820 {NULL, 0, 0, 0} 1821 }; 1822 1823 argvopt = argv; 1824 1825 while ((opt = getopt_long(argc, argvopt, "p:l:m:h:P", 1826 lgopts, &option_index)) != EOF) { 1827 1828 switch (opt) { 1829 /* portmask */ 1830 case 'p': 1831 enabled_port_mask = parse_portmask(optarg); 1832 if (enabled_port_mask == 0) { 1833 printf("invalid portmask\n"); 1834 print_usage(prgname); 1835 return -1; 1836 } 1837 break; 1838 case 'P': 1839 printf("Promiscuous mode selected\n"); 1840 promiscuous_on = 1; 1841 break; 1842 case 'l': 1843 limit = parse_max_pkt_len(optarg); 1844 freq_tlb[LOW] = limit; 1845 break; 1846 case 'm': 1847 limit = parse_max_pkt_len(optarg); 1848 freq_tlb[MED] = limit; 1849 break; 1850 case 'h': 1851 limit = parse_max_pkt_len(optarg); 1852 freq_tlb[HGH] = limit; 1853 break; 1854 /* long options */ 1855 case 0: 1856 if (!strncmp(lgopts[option_index].name, "config", 6)) { 1857 ret = parse_config(optarg); 1858 if (ret) { 1859 printf("invalid config\n"); 1860 print_usage(prgname); 1861 return -1; 1862 } 1863 } 1864 1865 if (!strncmp(lgopts[option_index].name, 1866 "perf-config", 11)) { 1867 ret = parse_perf_config(optarg); 1868 if (ret) { 1869 printf("invalid perf-config\n"); 1870 print_usage(prgname); 1871 return -1; 1872 } 1873 } 1874 1875 if (!strncmp(lgopts[option_index].name, 1876 "high-perf-cores", 15)) { 1877 ret = parse_perf_core_list(optarg); 1878 if (ret) { 1879 printf("invalid high-perf-cores\n"); 1880 print_usage(prgname); 1881 return -1; 1882 } 1883 } 1884 1885 if (!strncmp(lgopts[option_index].name, 1886 "no-numa", 7)) { 1887 printf("numa is disabled \n"); 1888 numa_on = 0; 1889 } 1890 1891 if (!strncmp(lgopts[option_index].name, 1892 CMD_LINE_OPT_LEGACY, 1893 sizeof(CMD_LINE_OPT_LEGACY))) { 1894 if (app_mode != APP_MODE_DEFAULT) { 1895 printf(" legacy mode is mutually exclusive with other modes\n"); 1896 return -1; 1897 } 1898 app_mode = APP_MODE_LEGACY; 1899 printf("legacy mode is enabled\n"); 1900 } 1901 1902 if (!strncmp(lgopts[option_index].name, 1903 CMD_LINE_OPT_EMPTY_POLL, 10)) { 1904 if (app_mode != APP_MODE_DEFAULT) { 1905 printf(" empty-poll mode is mutually exclusive with other modes\n"); 1906 return -1; 1907 } 1908 app_mode = APP_MODE_EMPTY_POLL; 1909 ret = parse_ep_config(optarg); 1910 1911 if (ret) { 1912 printf("invalid empty poll config\n"); 1913 print_usage(prgname); 1914 return -1; 1915 } 1916 printf("empty-poll is enabled\n"); 1917 } 1918 1919 if (!strncmp(lgopts[option_index].name, 1920 CMD_LINE_OPT_TELEMETRY, 1921 sizeof(CMD_LINE_OPT_TELEMETRY))) { 1922 if (app_mode != APP_MODE_DEFAULT) { 1923 printf(" telemetry mode is mutually exclusive with other modes\n"); 1924 return -1; 1925 } 1926 app_mode = APP_MODE_TELEMETRY; 1927 printf("telemetry mode is enabled\n"); 1928 } 1929 1930 if (!strncmp(lgopts[option_index].name, 1931 CMD_LINE_OPT_PMD_MGMT, 1932 sizeof(CMD_LINE_OPT_PMD_MGMT))) { 1933 if (app_mode != APP_MODE_DEFAULT) { 1934 printf(" power mgmt mode is mutually exclusive with other modes\n"); 1935 return -1; 1936 } 1937 if (parse_pmd_mgmt_config(optarg) < 0) { 1938 printf(" Invalid PMD power management mode: %s\n", 1939 optarg); 1940 return -1; 1941 } 1942 app_mode = APP_MODE_PMD_MGMT; 1943 printf("PMD power mgmt mode is enabled\n"); 1944 } 1945 if (!strncmp(lgopts[option_index].name, 1946 CMD_LINE_OPT_INTERRUPT_ONLY, 1947 sizeof(CMD_LINE_OPT_INTERRUPT_ONLY))) { 1948 if (app_mode != APP_MODE_DEFAULT) { 1949 printf(" interrupt-only mode is mutually exclusive with other modes\n"); 1950 return -1; 1951 } 1952 app_mode = APP_MODE_INTERRUPT; 1953 printf("interrupt-only mode is enabled\n"); 1954 } 1955 1956 if (!strncmp(lgopts[option_index].name, 1957 "enable-jumbo", 12)) { 1958 struct option lenopts = 1959 {"max-pkt-len", required_argument, \ 1960 0, 0}; 1961 1962 printf("jumbo frame is enabled \n"); 1963 port_conf.rxmode.offloads |= 1964 DEV_RX_OFFLOAD_JUMBO_FRAME; 1965 port_conf.txmode.offloads |= 1966 DEV_TX_OFFLOAD_MULTI_SEGS; 1967 1968 /** 1969 * if no max-pkt-len set, use the default value 1970 * RTE_ETHER_MAX_LEN 1971 */ 1972 if (0 == getopt_long(argc, argvopt, "", 1973 &lenopts, &option_index)) { 1974 ret = parse_max_pkt_len(optarg); 1975 if ((ret < 64) || 1976 (ret > MAX_JUMBO_PKT_LEN)){ 1977 printf("invalid packet " 1978 "length\n"); 1979 print_usage(prgname); 1980 return -1; 1981 } 1982 port_conf.rxmode.max_rx_pkt_len = ret; 1983 } 1984 printf("set jumbo frame " 1985 "max packet length to %u\n", 1986 (unsigned int)port_conf.rxmode.max_rx_pkt_len); 1987 } 1988 1989 if (!strncmp(lgopts[option_index].name, 1990 CMD_LINE_OPT_PARSE_PTYPE, 1991 sizeof(CMD_LINE_OPT_PARSE_PTYPE))) { 1992 printf("soft parse-ptype is enabled\n"); 1993 parse_ptype = 1; 1994 } 1995 1996 break; 1997 1998 default: 1999 print_usage(prgname); 2000 return -1; 2001 } 2002 } 2003 2004 if (optind >= 0) 2005 argv[optind-1] = prgname; 2006 2007 ret = optind-1; 2008 optind = 1; /* reset getopt lib */ 2009 return ret; 2010 } 2011 2012 static void 2013 print_ethaddr(const char *name, const struct rte_ether_addr *eth_addr) 2014 { 2015 char buf[RTE_ETHER_ADDR_FMT_SIZE]; 2016 rte_ether_format_addr(buf, RTE_ETHER_ADDR_FMT_SIZE, eth_addr); 2017 printf("%s%s", name, buf); 2018 } 2019 2020 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 2021 static void 2022 setup_hash(int socketid) 2023 { 2024 struct rte_hash_parameters ipv4_l3fwd_hash_params = { 2025 .name = NULL, 2026 .entries = L3FWD_HASH_ENTRIES, 2027 .key_len = sizeof(struct ipv4_5tuple), 2028 .hash_func = DEFAULT_HASH_FUNC, 2029 .hash_func_init_val = 0, 2030 }; 2031 2032 struct rte_hash_parameters ipv6_l3fwd_hash_params = { 2033 .name = NULL, 2034 .entries = L3FWD_HASH_ENTRIES, 2035 .key_len = sizeof(struct ipv6_5tuple), 2036 .hash_func = DEFAULT_HASH_FUNC, 2037 .hash_func_init_val = 0, 2038 }; 2039 2040 unsigned i; 2041 int ret; 2042 char s[64]; 2043 2044 /* create ipv4 hash */ 2045 snprintf(s, sizeof(s), "ipv4_l3fwd_hash_%d", socketid); 2046 ipv4_l3fwd_hash_params.name = s; 2047 ipv4_l3fwd_hash_params.socket_id = socketid; 2048 ipv4_l3fwd_lookup_struct[socketid] = 2049 rte_hash_create(&ipv4_l3fwd_hash_params); 2050 if (ipv4_l3fwd_lookup_struct[socketid] == NULL) 2051 rte_exit(EXIT_FAILURE, "Unable to create the l3fwd hash on " 2052 "socket %d\n", socketid); 2053 2054 /* create ipv6 hash */ 2055 snprintf(s, sizeof(s), "ipv6_l3fwd_hash_%d", socketid); 2056 ipv6_l3fwd_hash_params.name = s; 2057 ipv6_l3fwd_hash_params.socket_id = socketid; 2058 ipv6_l3fwd_lookup_struct[socketid] = 2059 rte_hash_create(&ipv6_l3fwd_hash_params); 2060 if (ipv6_l3fwd_lookup_struct[socketid] == NULL) 2061 rte_exit(EXIT_FAILURE, "Unable to create the l3fwd hash on " 2062 "socket %d\n", socketid); 2063 2064 2065 /* populate the ipv4 hash */ 2066 for (i = 0; i < RTE_DIM(ipv4_l3fwd_route_array); i++) { 2067 ret = rte_hash_add_key (ipv4_l3fwd_lookup_struct[socketid], 2068 (void *) &ipv4_l3fwd_route_array[i].key); 2069 if (ret < 0) { 2070 rte_exit(EXIT_FAILURE, "Unable to add entry %u to the" 2071 "l3fwd hash on socket %d\n", i, socketid); 2072 } 2073 ipv4_l3fwd_out_if[ret] = ipv4_l3fwd_route_array[i].if_out; 2074 printf("Hash: Adding key\n"); 2075 print_ipv4_key(ipv4_l3fwd_route_array[i].key); 2076 } 2077 2078 /* populate the ipv6 hash */ 2079 for (i = 0; i < RTE_DIM(ipv6_l3fwd_route_array); i++) { 2080 ret = rte_hash_add_key (ipv6_l3fwd_lookup_struct[socketid], 2081 (void *) &ipv6_l3fwd_route_array[i].key); 2082 if (ret < 0) { 2083 rte_exit(EXIT_FAILURE, "Unable to add entry %u to the" 2084 "l3fwd hash on socket %d\n", i, socketid); 2085 } 2086 ipv6_l3fwd_out_if[ret] = ipv6_l3fwd_route_array[i].if_out; 2087 printf("Hash: Adding key\n"); 2088 print_ipv6_key(ipv6_l3fwd_route_array[i].key); 2089 } 2090 } 2091 #endif 2092 2093 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 2094 static void 2095 setup_lpm(int socketid) 2096 { 2097 unsigned i; 2098 int ret; 2099 char s[64]; 2100 2101 /* create the LPM table */ 2102 struct rte_lpm_config lpm_ipv4_config; 2103 2104 lpm_ipv4_config.max_rules = IPV4_L3FWD_LPM_MAX_RULES; 2105 lpm_ipv4_config.number_tbl8s = 256; 2106 lpm_ipv4_config.flags = 0; 2107 2108 snprintf(s, sizeof(s), "IPV4_L3FWD_LPM_%d", socketid); 2109 ipv4_l3fwd_lookup_struct[socketid] = 2110 rte_lpm_create(s, socketid, &lpm_ipv4_config); 2111 if (ipv4_l3fwd_lookup_struct[socketid] == NULL) 2112 rte_exit(EXIT_FAILURE, "Unable to create the l3fwd LPM table" 2113 " on socket %d\n", socketid); 2114 2115 /* populate the LPM table */ 2116 for (i = 0; i < RTE_DIM(ipv4_l3fwd_route_array); i++) { 2117 ret = rte_lpm_add(ipv4_l3fwd_lookup_struct[socketid], 2118 ipv4_l3fwd_route_array[i].ip, 2119 ipv4_l3fwd_route_array[i].depth, 2120 ipv4_l3fwd_route_array[i].if_out); 2121 2122 if (ret < 0) { 2123 rte_exit(EXIT_FAILURE, "Unable to add entry %u to the " 2124 "l3fwd LPM table on socket %d\n", 2125 i, socketid); 2126 } 2127 2128 printf("LPM: Adding route 0x%08x / %d (%d)\n", 2129 (unsigned)ipv4_l3fwd_route_array[i].ip, 2130 ipv4_l3fwd_route_array[i].depth, 2131 ipv4_l3fwd_route_array[i].if_out); 2132 } 2133 } 2134 #endif 2135 2136 static int 2137 init_mem(unsigned nb_mbuf) 2138 { 2139 struct lcore_conf *qconf; 2140 int socketid; 2141 unsigned lcore_id; 2142 char s[64]; 2143 2144 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { 2145 if (rte_lcore_is_enabled(lcore_id) == 0) 2146 continue; 2147 2148 if (numa_on) 2149 socketid = rte_lcore_to_socket_id(lcore_id); 2150 else 2151 socketid = 0; 2152 2153 if (socketid >= NB_SOCKETS) { 2154 rte_exit(EXIT_FAILURE, "Socket %d of lcore %u is " 2155 "out of range %d\n", socketid, 2156 lcore_id, NB_SOCKETS); 2157 } 2158 if (pktmbuf_pool[socketid] == NULL) { 2159 snprintf(s, sizeof(s), "mbuf_pool_%d", socketid); 2160 pktmbuf_pool[socketid] = 2161 rte_pktmbuf_pool_create(s, nb_mbuf, 2162 MEMPOOL_CACHE_SIZE, 0, 2163 RTE_MBUF_DEFAULT_BUF_SIZE, 2164 socketid); 2165 if (pktmbuf_pool[socketid] == NULL) 2166 rte_exit(EXIT_FAILURE, 2167 "Cannot init mbuf pool on socket %d\n", 2168 socketid); 2169 else 2170 printf("Allocated mbuf pool on socket %d\n", 2171 socketid); 2172 2173 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 2174 setup_lpm(socketid); 2175 #else 2176 setup_hash(socketid); 2177 #endif 2178 } 2179 qconf = &lcore_conf[lcore_id]; 2180 qconf->ipv4_lookup_struct = ipv4_l3fwd_lookup_struct[socketid]; 2181 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 2182 qconf->ipv6_lookup_struct = ipv6_l3fwd_lookup_struct[socketid]; 2183 #endif 2184 } 2185 return 0; 2186 } 2187 2188 /* Check the link status of all ports in up to 9s, and print them finally */ 2189 static void 2190 check_all_ports_link_status(uint32_t port_mask) 2191 { 2192 #define CHECK_INTERVAL 100 /* 100ms */ 2193 #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */ 2194 uint8_t count, all_ports_up, print_flag = 0; 2195 uint16_t portid; 2196 struct rte_eth_link link; 2197 int ret; 2198 char link_status_text[RTE_ETH_LINK_MAX_STR_LEN]; 2199 2200 printf("\nChecking link status"); 2201 fflush(stdout); 2202 for (count = 0; count <= MAX_CHECK_TIME; count++) { 2203 all_ports_up = 1; 2204 RTE_ETH_FOREACH_DEV(portid) { 2205 if ((port_mask & (1 << portid)) == 0) 2206 continue; 2207 memset(&link, 0, sizeof(link)); 2208 ret = rte_eth_link_get_nowait(portid, &link); 2209 if (ret < 0) { 2210 all_ports_up = 0; 2211 if (print_flag == 1) 2212 printf("Port %u link get failed: %s\n", 2213 portid, rte_strerror(-ret)); 2214 continue; 2215 } 2216 /* print link status if flag set */ 2217 if (print_flag == 1) { 2218 rte_eth_link_to_str(link_status_text, 2219 sizeof(link_status_text), &link); 2220 printf("Port %d %s\n", portid, 2221 link_status_text); 2222 continue; 2223 } 2224 /* clear all_ports_up flag if any link down */ 2225 if (link.link_status == ETH_LINK_DOWN) { 2226 all_ports_up = 0; 2227 break; 2228 } 2229 } 2230 /* after finally printing all link status, get out */ 2231 if (print_flag == 1) 2232 break; 2233 2234 if (all_ports_up == 0) { 2235 printf("."); 2236 fflush(stdout); 2237 rte_delay_ms(CHECK_INTERVAL); 2238 } 2239 2240 /* set the print_flag if all ports up or timeout */ 2241 if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) { 2242 print_flag = 1; 2243 printf("done\n"); 2244 } 2245 } 2246 } 2247 2248 static int check_ptype(uint16_t portid) 2249 { 2250 int i, ret; 2251 int ptype_l3_ipv4 = 0; 2252 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 2253 int ptype_l3_ipv6 = 0; 2254 #endif 2255 uint32_t ptype_mask = RTE_PTYPE_L3_MASK; 2256 2257 ret = rte_eth_dev_get_supported_ptypes(portid, ptype_mask, NULL, 0); 2258 if (ret <= 0) 2259 return 0; 2260 2261 uint32_t ptypes[ret]; 2262 2263 ret = rte_eth_dev_get_supported_ptypes(portid, ptype_mask, ptypes, ret); 2264 for (i = 0; i < ret; ++i) { 2265 if (ptypes[i] & RTE_PTYPE_L3_IPV4) 2266 ptype_l3_ipv4 = 1; 2267 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 2268 if (ptypes[i] & RTE_PTYPE_L3_IPV6) 2269 ptype_l3_ipv6 = 1; 2270 #endif 2271 } 2272 2273 if (ptype_l3_ipv4 == 0) 2274 printf("port %d cannot parse RTE_PTYPE_L3_IPV4\n", portid); 2275 2276 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 2277 if (ptype_l3_ipv6 == 0) 2278 printf("port %d cannot parse RTE_PTYPE_L3_IPV6\n", portid); 2279 #endif 2280 2281 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 2282 if (ptype_l3_ipv4) 2283 #else /* APP_LOOKUP_EXACT_MATCH */ 2284 if (ptype_l3_ipv4 && ptype_l3_ipv6) 2285 #endif 2286 return 1; 2287 2288 return 0; 2289 2290 } 2291 2292 static int 2293 init_power_library(void) 2294 { 2295 enum power_management_env env; 2296 unsigned int lcore_id; 2297 int ret = 0; 2298 2299 RTE_LCORE_FOREACH(lcore_id) { 2300 /* init power management library */ 2301 ret = rte_power_init(lcore_id); 2302 if (ret) { 2303 RTE_LOG(ERR, POWER, 2304 "Library initialization failed on core %u\n", 2305 lcore_id); 2306 return ret; 2307 } 2308 /* we're not supporting the VM channel mode */ 2309 env = rte_power_get_env(); 2310 if (env != PM_ENV_ACPI_CPUFREQ && 2311 env != PM_ENV_PSTATE_CPUFREQ) { 2312 RTE_LOG(ERR, POWER, 2313 "Only ACPI and PSTATE mode are supported\n"); 2314 return -1; 2315 } 2316 } 2317 return ret; 2318 } 2319 2320 static int 2321 deinit_power_library(void) 2322 { 2323 unsigned int lcore_id; 2324 int ret = 0; 2325 2326 RTE_LCORE_FOREACH(lcore_id) { 2327 /* deinit power management library */ 2328 ret = rte_power_exit(lcore_id); 2329 if (ret) { 2330 RTE_LOG(ERR, POWER, 2331 "Library deinitialization failed on core %u\n", 2332 lcore_id); 2333 return ret; 2334 } 2335 } 2336 return ret; 2337 } 2338 2339 static void 2340 get_current_stat_values(uint64_t *values) 2341 { 2342 unsigned int lcore_id = rte_lcore_id(); 2343 struct lcore_conf *qconf; 2344 uint64_t app_eps = 0, app_fps = 0, app_br = 0; 2345 uint64_t count = 0; 2346 2347 RTE_LCORE_FOREACH_WORKER(lcore_id) { 2348 qconf = &lcore_conf[lcore_id]; 2349 if (qconf->n_rx_queue == 0) 2350 continue; 2351 count++; 2352 rte_spinlock_lock(&stats[lcore_id].telemetry_lock); 2353 app_eps += stats[lcore_id].ep_nep[1]; 2354 app_fps += stats[lcore_id].fp_nfp[1]; 2355 app_br += stats[lcore_id].br; 2356 rte_spinlock_unlock(&stats[lcore_id].telemetry_lock); 2357 } 2358 2359 if (count > 0) { 2360 values[0] = app_eps/count; 2361 values[1] = app_fps/count; 2362 values[2] = app_br/count; 2363 } else 2364 memset(values, 0, sizeof(uint64_t) * NUM_TELSTATS); 2365 2366 } 2367 2368 static void 2369 update_telemetry(__rte_unused struct rte_timer *tim, 2370 __rte_unused void *arg) 2371 { 2372 int ret; 2373 uint64_t values[NUM_TELSTATS] = {0}; 2374 2375 get_current_stat_values(values); 2376 ret = rte_metrics_update_values(RTE_METRICS_GLOBAL, telstats_index, 2377 values, RTE_DIM(values)); 2378 if (ret < 0) 2379 RTE_LOG(WARNING, POWER, "failed to update metrcis\n"); 2380 } 2381 2382 static int 2383 handle_app_stats(const char *cmd __rte_unused, 2384 const char *params __rte_unused, 2385 struct rte_tel_data *d) 2386 { 2387 uint64_t values[NUM_TELSTATS] = {0}; 2388 uint32_t i; 2389 2390 rte_tel_data_start_dict(d); 2391 get_current_stat_values(values); 2392 for (i = 0; i < NUM_TELSTATS; i++) 2393 rte_tel_data_add_dict_u64(d, telstats_strings[i].name, 2394 values[i]); 2395 return 0; 2396 } 2397 2398 static void 2399 telemetry_setup_timer(void) 2400 { 2401 int lcore_id = rte_lcore_id(); 2402 uint64_t hz = rte_get_timer_hz(); 2403 uint64_t ticks; 2404 2405 ticks = hz / TELEMETRY_INTERVALS_PER_SEC; 2406 rte_timer_reset_sync(&telemetry_timer, 2407 ticks, 2408 PERIODICAL, 2409 lcore_id, 2410 update_telemetry, 2411 NULL); 2412 } 2413 static void 2414 empty_poll_setup_timer(void) 2415 { 2416 int lcore_id = rte_lcore_id(); 2417 uint64_t hz = rte_get_timer_hz(); 2418 2419 struct ep_params *ep_ptr = ep_params; 2420 2421 ep_ptr->interval_ticks = hz / INTERVALS_PER_SECOND; 2422 2423 rte_timer_reset_sync(&ep_ptr->timer0, 2424 ep_ptr->interval_ticks, 2425 PERIODICAL, 2426 lcore_id, 2427 rte_empty_poll_detection, 2428 (void *)ep_ptr); 2429 2430 } 2431 static int 2432 launch_timer(unsigned int lcore_id) 2433 { 2434 int64_t prev_tsc = 0, cur_tsc, diff_tsc, cycles_10ms; 2435 2436 RTE_SET_USED(lcore_id); 2437 2438 2439 if (rte_get_main_lcore() != lcore_id) { 2440 rte_panic("timer on lcore:%d which is not main core:%d\n", 2441 lcore_id, 2442 rte_get_main_lcore()); 2443 } 2444 2445 RTE_LOG(INFO, POWER, "Bring up the Timer\n"); 2446 2447 if (app_mode == APP_MODE_EMPTY_POLL) 2448 empty_poll_setup_timer(); 2449 else 2450 telemetry_setup_timer(); 2451 2452 cycles_10ms = rte_get_timer_hz() / 100; 2453 2454 while (!is_done()) { 2455 cur_tsc = rte_rdtsc(); 2456 diff_tsc = cur_tsc - prev_tsc; 2457 if (diff_tsc > cycles_10ms) { 2458 rte_timer_manage(); 2459 prev_tsc = cur_tsc; 2460 cycles_10ms = rte_get_timer_hz() / 100; 2461 } 2462 } 2463 2464 RTE_LOG(INFO, POWER, "Timer_subsystem is done\n"); 2465 2466 return 0; 2467 } 2468 2469 static int 2470 autodetect_mode(void) 2471 { 2472 RTE_LOG(NOTICE, L3FWD_POWER, "Operating mode not specified, probing frequency scaling support...\n"); 2473 2474 /* 2475 * Empty poll and telemetry modes have to be specifically requested to 2476 * be enabled, but we can auto-detect between interrupt mode with or 2477 * without frequency scaling. Both ACPI and pstate can be used. 2478 */ 2479 if (rte_power_check_env_supported(PM_ENV_ACPI_CPUFREQ)) 2480 return APP_MODE_LEGACY; 2481 if (rte_power_check_env_supported(PM_ENV_PSTATE_CPUFREQ)) 2482 return APP_MODE_LEGACY; 2483 2484 RTE_LOG(NOTICE, L3FWD_POWER, "Frequency scaling not supported, selecting interrupt-only mode\n"); 2485 2486 return APP_MODE_INTERRUPT; 2487 } 2488 2489 static const char * 2490 mode_to_str(enum appmode mode) 2491 { 2492 switch (mode) { 2493 case APP_MODE_LEGACY: 2494 return "legacy"; 2495 case APP_MODE_EMPTY_POLL: 2496 return "empty poll"; 2497 case APP_MODE_TELEMETRY: 2498 return "telemetry"; 2499 case APP_MODE_INTERRUPT: 2500 return "interrupt-only"; 2501 case APP_MODE_PMD_MGMT: 2502 return "pmd mgmt"; 2503 default: 2504 return "invalid"; 2505 } 2506 } 2507 2508 /* Power library initialized in the main routine. 8< */ 2509 int 2510 main(int argc, char **argv) 2511 { 2512 struct lcore_conf *qconf; 2513 struct rte_eth_dev_info dev_info; 2514 struct rte_eth_txconf *txconf; 2515 int ret; 2516 uint16_t nb_ports; 2517 uint16_t queueid; 2518 unsigned lcore_id; 2519 uint64_t hz; 2520 uint32_t n_tx_queue, nb_lcores; 2521 uint32_t dev_rxq_num, dev_txq_num; 2522 uint8_t nb_rx_queue, queue, socketid; 2523 uint16_t portid; 2524 const char *ptr_strings[NUM_TELSTATS]; 2525 2526 /* catch SIGINT and restore cpufreq governor to ondemand */ 2527 signal(SIGINT, signal_exit_now); 2528 2529 /* init EAL */ 2530 ret = rte_eal_init(argc, argv); 2531 if (ret < 0) 2532 rte_exit(EXIT_FAILURE, "Invalid EAL parameters\n"); 2533 argc -= ret; 2534 argv += ret; 2535 2536 /* init RTE timer library to be used late */ 2537 rte_timer_subsystem_init(); 2538 2539 /* if we're running pmd-mgmt mode, don't default to baseline mode */ 2540 baseline_enabled = false; 2541 2542 /* parse application arguments (after the EAL ones) */ 2543 ret = parse_args(argc, argv); 2544 if (ret < 0) 2545 rte_exit(EXIT_FAILURE, "Invalid L3FWD parameters\n"); 2546 2547 if (app_mode == APP_MODE_DEFAULT) 2548 app_mode = autodetect_mode(); 2549 2550 RTE_LOG(INFO, L3FWD_POWER, "Selected operation mode: %s\n", 2551 mode_to_str(app_mode)); 2552 2553 /* only legacy and empty poll mode rely on power library */ 2554 if ((app_mode == APP_MODE_LEGACY || app_mode == APP_MODE_EMPTY_POLL) && 2555 init_power_library()) 2556 rte_exit(EXIT_FAILURE, "init_power_library failed\n"); 2557 2558 if (update_lcore_params() < 0) 2559 rte_exit(EXIT_FAILURE, "update_lcore_params failed\n"); 2560 2561 if (check_lcore_params() < 0) 2562 rte_exit(EXIT_FAILURE, "check_lcore_params failed\n"); 2563 2564 ret = init_lcore_rx_queues(); 2565 if (ret < 0) 2566 rte_exit(EXIT_FAILURE, "init_lcore_rx_queues failed\n"); 2567 2568 nb_ports = rte_eth_dev_count_avail(); 2569 2570 if (check_port_config() < 0) 2571 rte_exit(EXIT_FAILURE, "check_port_config failed\n"); 2572 2573 nb_lcores = rte_lcore_count(); 2574 2575 /* initialize all ports */ 2576 RTE_ETH_FOREACH_DEV(portid) { 2577 struct rte_eth_conf local_port_conf = port_conf; 2578 /* not all app modes need interrupts */ 2579 bool need_intr = app_mode == APP_MODE_LEGACY || 2580 app_mode == APP_MODE_INTERRUPT; 2581 2582 /* skip ports that are not enabled */ 2583 if ((enabled_port_mask & (1 << portid)) == 0) { 2584 printf("\nSkipping disabled port %d\n", portid); 2585 continue; 2586 } 2587 2588 /* init port */ 2589 printf("Initializing port %d ... ", portid ); 2590 fflush(stdout); 2591 2592 ret = rte_eth_dev_info_get(portid, &dev_info); 2593 if (ret != 0) 2594 rte_exit(EXIT_FAILURE, 2595 "Error during getting device (port %u) info: %s\n", 2596 portid, strerror(-ret)); 2597 2598 dev_rxq_num = dev_info.max_rx_queues; 2599 dev_txq_num = dev_info.max_tx_queues; 2600 2601 nb_rx_queue = get_port_n_rx_queues(portid); 2602 if (nb_rx_queue > dev_rxq_num) 2603 rte_exit(EXIT_FAILURE, 2604 "Cannot configure not existed rxq: " 2605 "port=%d\n", portid); 2606 2607 n_tx_queue = nb_lcores; 2608 if (n_tx_queue > dev_txq_num) 2609 n_tx_queue = dev_txq_num; 2610 printf("Creating queues: nb_rxq=%d nb_txq=%u... ", 2611 nb_rx_queue, (unsigned)n_tx_queue ); 2612 /* If number of Rx queue is 0, no need to enable Rx interrupt */ 2613 if (nb_rx_queue == 0) 2614 need_intr = false; 2615 2616 if (need_intr) 2617 local_port_conf.intr_conf.rxq = 1; 2618 2619 ret = rte_eth_dev_info_get(portid, &dev_info); 2620 if (ret != 0) 2621 rte_exit(EXIT_FAILURE, 2622 "Error during getting device (port %u) info: %s\n", 2623 portid, strerror(-ret)); 2624 2625 if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE) 2626 local_port_conf.txmode.offloads |= 2627 DEV_TX_OFFLOAD_MBUF_FAST_FREE; 2628 2629 local_port_conf.rx_adv_conf.rss_conf.rss_hf &= 2630 dev_info.flow_type_rss_offloads; 2631 if (local_port_conf.rx_adv_conf.rss_conf.rss_hf != 2632 port_conf.rx_adv_conf.rss_conf.rss_hf) { 2633 printf("Port %u modified RSS hash function based on hardware support," 2634 "requested:%#"PRIx64" configured:%#"PRIx64"\n", 2635 portid, 2636 port_conf.rx_adv_conf.rss_conf.rss_hf, 2637 local_port_conf.rx_adv_conf.rss_conf.rss_hf); 2638 } 2639 2640 ret = rte_eth_dev_configure(portid, nb_rx_queue, 2641 (uint16_t)n_tx_queue, &local_port_conf); 2642 if (ret < 0) 2643 rte_exit(EXIT_FAILURE, "Cannot configure device: " 2644 "err=%d, port=%d\n", ret, portid); 2645 2646 ret = rte_eth_dev_adjust_nb_rx_tx_desc(portid, &nb_rxd, 2647 &nb_txd); 2648 if (ret < 0) 2649 rte_exit(EXIT_FAILURE, 2650 "Cannot adjust number of descriptors: err=%d, port=%d\n", 2651 ret, portid); 2652 2653 ret = rte_eth_macaddr_get(portid, &ports_eth_addr[portid]); 2654 if (ret < 0) 2655 rte_exit(EXIT_FAILURE, 2656 "Cannot get MAC address: err=%d, port=%d\n", 2657 ret, portid); 2658 2659 print_ethaddr(" Address:", &ports_eth_addr[portid]); 2660 printf(", "); 2661 2662 /* init memory */ 2663 ret = init_mem(NB_MBUF); 2664 if (ret < 0) 2665 rte_exit(EXIT_FAILURE, "init_mem failed\n"); 2666 2667 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { 2668 if (rte_lcore_is_enabled(lcore_id) == 0) 2669 continue; 2670 2671 /* Initialize TX buffers */ 2672 qconf = &lcore_conf[lcore_id]; 2673 qconf->tx_buffer[portid] = rte_zmalloc_socket("tx_buffer", 2674 RTE_ETH_TX_BUFFER_SIZE(MAX_PKT_BURST), 0, 2675 rte_eth_dev_socket_id(portid)); 2676 if (qconf->tx_buffer[portid] == NULL) 2677 rte_exit(EXIT_FAILURE, "Can't allocate tx buffer for port %u\n", 2678 portid); 2679 2680 rte_eth_tx_buffer_init(qconf->tx_buffer[portid], MAX_PKT_BURST); 2681 } 2682 2683 /* init one TX queue per couple (lcore,port) */ 2684 queueid = 0; 2685 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { 2686 if (rte_lcore_is_enabled(lcore_id) == 0) 2687 continue; 2688 2689 if (queueid >= dev_txq_num) 2690 continue; 2691 2692 if (numa_on) 2693 socketid = \ 2694 (uint8_t)rte_lcore_to_socket_id(lcore_id); 2695 else 2696 socketid = 0; 2697 2698 printf("txq=%u,%d,%d ", lcore_id, queueid, socketid); 2699 fflush(stdout); 2700 2701 txconf = &dev_info.default_txconf; 2702 txconf->offloads = local_port_conf.txmode.offloads; 2703 ret = rte_eth_tx_queue_setup(portid, queueid, nb_txd, 2704 socketid, txconf); 2705 if (ret < 0) 2706 rte_exit(EXIT_FAILURE, 2707 "rte_eth_tx_queue_setup: err=%d, " 2708 "port=%d\n", ret, portid); 2709 2710 qconf = &lcore_conf[lcore_id]; 2711 qconf->tx_queue_id[portid] = queueid; 2712 queueid++; 2713 2714 qconf->tx_port_id[qconf->n_tx_port] = portid; 2715 qconf->n_tx_port++; 2716 } 2717 printf("\n"); 2718 } 2719 2720 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { 2721 if (rte_lcore_is_enabled(lcore_id) == 0) 2722 continue; 2723 2724 if (app_mode == APP_MODE_LEGACY) { 2725 /* init timer structures for each enabled lcore */ 2726 rte_timer_init(&power_timers[lcore_id]); 2727 hz = rte_get_timer_hz(); 2728 rte_timer_reset(&power_timers[lcore_id], 2729 hz/TIMER_NUMBER_PER_SECOND, 2730 SINGLE, lcore_id, 2731 power_timer_cb, NULL); 2732 } 2733 qconf = &lcore_conf[lcore_id]; 2734 printf("\nInitializing rx queues on lcore %u ... ", lcore_id ); 2735 fflush(stdout); 2736 2737 /* init RX queues */ 2738 for(queue = 0; queue < qconf->n_rx_queue; ++queue) { 2739 struct rte_eth_rxconf rxq_conf; 2740 2741 portid = qconf->rx_queue_list[queue].port_id; 2742 queueid = qconf->rx_queue_list[queue].queue_id; 2743 2744 if (numa_on) 2745 socketid = \ 2746 (uint8_t)rte_lcore_to_socket_id(lcore_id); 2747 else 2748 socketid = 0; 2749 2750 printf("rxq=%d,%d,%d ", portid, queueid, socketid); 2751 fflush(stdout); 2752 2753 ret = rte_eth_dev_info_get(portid, &dev_info); 2754 if (ret != 0) 2755 rte_exit(EXIT_FAILURE, 2756 "Error during getting device (port %u) info: %s\n", 2757 portid, strerror(-ret)); 2758 2759 rxq_conf = dev_info.default_rxconf; 2760 rxq_conf.offloads = port_conf.rxmode.offloads; 2761 ret = rte_eth_rx_queue_setup(portid, queueid, nb_rxd, 2762 socketid, &rxq_conf, 2763 pktmbuf_pool[socketid]); 2764 if (ret < 0) 2765 rte_exit(EXIT_FAILURE, 2766 "rte_eth_rx_queue_setup: err=%d, " 2767 "port=%d\n", ret, portid); 2768 2769 if (parse_ptype) { 2770 if (add_cb_parse_ptype(portid, queueid) < 0) 2771 rte_exit(EXIT_FAILURE, 2772 "Fail to add ptype cb\n"); 2773 } 2774 2775 if (app_mode == APP_MODE_PMD_MGMT && !baseline_enabled) { 2776 ret = rte_power_ethdev_pmgmt_queue_enable( 2777 lcore_id, portid, queueid, 2778 pmgmt_type); 2779 if (ret < 0) 2780 rte_exit(EXIT_FAILURE, 2781 "rte_power_ethdev_pmgmt_queue_enable: err=%d, port=%d\n", 2782 ret, portid); 2783 } 2784 } 2785 } 2786 /* >8 End of power library initialization. */ 2787 2788 printf("\n"); 2789 2790 /* start ports */ 2791 RTE_ETH_FOREACH_DEV(portid) { 2792 if ((enabled_port_mask & (1 << portid)) == 0) { 2793 continue; 2794 } 2795 /* Start device */ 2796 ret = rte_eth_dev_start(portid); 2797 if (ret < 0) 2798 rte_exit(EXIT_FAILURE, "rte_eth_dev_start: err=%d, " 2799 "port=%d\n", ret, portid); 2800 /* 2801 * If enabled, put device in promiscuous mode. 2802 * This allows IO forwarding mode to forward packets 2803 * to itself through 2 cross-connected ports of the 2804 * target machine. 2805 */ 2806 if (promiscuous_on) { 2807 ret = rte_eth_promiscuous_enable(portid); 2808 if (ret != 0) 2809 rte_exit(EXIT_FAILURE, 2810 "rte_eth_promiscuous_enable: err=%s, port=%u\n", 2811 rte_strerror(-ret), portid); 2812 } 2813 /* initialize spinlock for each port */ 2814 rte_spinlock_init(&(locks[portid])); 2815 2816 if (!parse_ptype) 2817 if (!check_ptype(portid)) 2818 rte_exit(EXIT_FAILURE, 2819 "PMD can not provide needed ptypes\n"); 2820 } 2821 2822 check_all_ports_link_status(enabled_port_mask); 2823 2824 if (app_mode == APP_MODE_EMPTY_POLL) { 2825 2826 if (empty_poll_train) { 2827 policy.state = TRAINING; 2828 } else { 2829 policy.state = MED_NORMAL; 2830 policy.med_base_edpi = ep_med_edpi; 2831 policy.hgh_base_edpi = ep_hgh_edpi; 2832 } 2833 2834 ret = rte_power_empty_poll_stat_init(&ep_params, 2835 freq_tlb, 2836 &policy); 2837 if (ret < 0) 2838 rte_exit(EXIT_FAILURE, "empty poll init failed"); 2839 } 2840 2841 2842 /* launch per-lcore init on every lcore */ 2843 if (app_mode == APP_MODE_LEGACY) { 2844 rte_eal_mp_remote_launch(main_legacy_loop, NULL, CALL_MAIN); 2845 } else if (app_mode == APP_MODE_EMPTY_POLL) { 2846 empty_poll_stop = false; 2847 rte_eal_mp_remote_launch(main_empty_poll_loop, NULL, 2848 SKIP_MAIN); 2849 } else if (app_mode == APP_MODE_TELEMETRY) { 2850 unsigned int i; 2851 2852 /* Init metrics library */ 2853 rte_metrics_init(rte_socket_id()); 2854 /** Register stats with metrics library */ 2855 for (i = 0; i < NUM_TELSTATS; i++) 2856 ptr_strings[i] = telstats_strings[i].name; 2857 2858 ret = rte_metrics_reg_names(ptr_strings, NUM_TELSTATS); 2859 if (ret >= 0) 2860 telstats_index = ret; 2861 else 2862 rte_exit(EXIT_FAILURE, "failed to register metrics names"); 2863 2864 RTE_LCORE_FOREACH_WORKER(lcore_id) { 2865 rte_spinlock_init(&stats[lcore_id].telemetry_lock); 2866 } 2867 rte_timer_init(&telemetry_timer); 2868 rte_telemetry_register_cmd("/l3fwd-power/stats", 2869 handle_app_stats, 2870 "Returns global power stats. Parameters: None"); 2871 rte_eal_mp_remote_launch(main_telemetry_loop, NULL, 2872 SKIP_MAIN); 2873 } else if (app_mode == APP_MODE_INTERRUPT) { 2874 rte_eal_mp_remote_launch(main_intr_loop, NULL, CALL_MAIN); 2875 } else if (app_mode == APP_MODE_PMD_MGMT) { 2876 /* reuse telemetry loop for PMD power management mode */ 2877 rte_eal_mp_remote_launch(main_telemetry_loop, NULL, CALL_MAIN); 2878 } 2879 2880 if (app_mode == APP_MODE_EMPTY_POLL || app_mode == APP_MODE_TELEMETRY) 2881 launch_timer(rte_lcore_id()); 2882 2883 RTE_LCORE_FOREACH_WORKER(lcore_id) { 2884 if (rte_eal_wait_lcore(lcore_id) < 0) 2885 return -1; 2886 } 2887 2888 if (app_mode == APP_MODE_PMD_MGMT) { 2889 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { 2890 if (rte_lcore_is_enabled(lcore_id) == 0) 2891 continue; 2892 qconf = &lcore_conf[lcore_id]; 2893 for (queue = 0; queue < qconf->n_rx_queue; ++queue) { 2894 portid = qconf->rx_queue_list[queue].port_id; 2895 queueid = qconf->rx_queue_list[queue].queue_id; 2896 2897 rte_power_ethdev_pmgmt_queue_disable(lcore_id, 2898 portid, queueid); 2899 } 2900 } 2901 } 2902 2903 RTE_ETH_FOREACH_DEV(portid) 2904 { 2905 if ((enabled_port_mask & (1 << portid)) == 0) 2906 continue; 2907 2908 ret = rte_eth_dev_stop(portid); 2909 if (ret != 0) 2910 RTE_LOG(ERR, L3FWD_POWER, "rte_eth_dev_stop: err=%d, port=%u\n", 2911 ret, portid); 2912 2913 rte_eth_dev_close(portid); 2914 } 2915 2916 if (app_mode == APP_MODE_EMPTY_POLL) 2917 rte_power_empty_poll_stat_free(); 2918 2919 if ((app_mode == APP_MODE_LEGACY || app_mode == APP_MODE_EMPTY_POLL) && 2920 deinit_power_library()) 2921 rte_exit(EXIT_FAILURE, "deinit_power_library failed\n"); 2922 2923 if (rte_eal_cleanup() < 0) 2924 RTE_LOG(ERR, L3FWD_POWER, "EAL cleanup failed\n"); 2925 2926 return 0; 2927 } 2928