1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2018 Intel Corporation 3 */ 4 5 #include <stdio.h> 6 #include <stdlib.h> 7 #include <stdint.h> 8 #include <inttypes.h> 9 #include <sys/types.h> 10 #include <string.h> 11 #include <sys/queue.h> 12 #include <stdarg.h> 13 #include <errno.h> 14 #include <getopt.h> 15 #include <unistd.h> 16 #include <signal.h> 17 #include <math.h> 18 19 #include <rte_common.h> 20 #include <rte_byteorder.h> 21 #include <rte_log.h> 22 #include <rte_malloc.h> 23 #include <rte_memory.h> 24 #include <rte_memcpy.h> 25 #include <rte_eal.h> 26 #include <rte_launch.h> 27 #include <rte_atomic.h> 28 #include <rte_cycles.h> 29 #include <rte_prefetch.h> 30 #include <rte_lcore.h> 31 #include <rte_per_lcore.h> 32 #include <rte_branch_prediction.h> 33 #include <rte_interrupts.h> 34 #include <rte_random.h> 35 #include <rte_debug.h> 36 #include <rte_ether.h> 37 #include <rte_ethdev.h> 38 #include <rte_mempool.h> 39 #include <rte_mbuf.h> 40 #include <rte_ip.h> 41 #include <rte_tcp.h> 42 #include <rte_udp.h> 43 #include <rte_string_fns.h> 44 #include <rte_timer.h> 45 #include <rte_power.h> 46 #include <rte_spinlock.h> 47 #include <rte_power_empty_poll.h> 48 #include <rte_metrics.h> 49 #include <rte_telemetry.h> 50 #include <rte_power_pmd_mgmt.h> 51 52 #include "perf_core.h" 53 #include "main.h" 54 55 #define RTE_LOGTYPE_L3FWD_POWER RTE_LOGTYPE_USER1 56 57 #define MAX_PKT_BURST 32 58 59 #define MIN_ZERO_POLL_COUNT 10 60 61 /* 100 ms interval */ 62 #define TIMER_NUMBER_PER_SECOND 10 63 /* (10ms) */ 64 #define INTERVALS_PER_SECOND 100 65 /* 100000 us */ 66 #define SCALING_PERIOD (1000000/TIMER_NUMBER_PER_SECOND) 67 #define SCALING_DOWN_TIME_RATIO_THRESHOLD 0.25 68 69 #define APP_LOOKUP_EXACT_MATCH 0 70 #define APP_LOOKUP_LPM 1 71 #define DO_RFC_1812_CHECKS 72 73 #ifndef APP_LOOKUP_METHOD 74 #define APP_LOOKUP_METHOD APP_LOOKUP_LPM 75 #endif 76 77 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 78 #include <rte_hash.h> 79 #elif (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 80 #include <rte_lpm.h> 81 #else 82 #error "APP_LOOKUP_METHOD set to incorrect value" 83 #endif 84 85 #ifndef IPv6_BYTES 86 #define IPv6_BYTES_FMT "%02x%02x:%02x%02x:%02x%02x:%02x%02x:"\ 87 "%02x%02x:%02x%02x:%02x%02x:%02x%02x" 88 #define IPv6_BYTES(addr) \ 89 addr[0], addr[1], addr[2], addr[3], \ 90 addr[4], addr[5], addr[6], addr[7], \ 91 addr[8], addr[9], addr[10], addr[11],\ 92 addr[12], addr[13],addr[14], addr[15] 93 #endif 94 95 #define MAX_JUMBO_PKT_LEN 9600 96 97 #define IPV6_ADDR_LEN 16 98 99 #define MEMPOOL_CACHE_SIZE 256 100 101 /* 102 * This expression is used to calculate the number of mbufs needed depending on 103 * user input, taking into account memory for rx and tx hardware rings, cache 104 * per lcore and mtable per port per lcore. RTE_MAX is used to ensure that 105 * NB_MBUF never goes below a minimum value of 8192. 106 */ 107 108 #define NB_MBUF RTE_MAX ( \ 109 (nb_ports*nb_rx_queue*nb_rxd + \ 110 nb_ports*nb_lcores*MAX_PKT_BURST + \ 111 nb_ports*n_tx_queue*nb_txd + \ 112 nb_lcores*MEMPOOL_CACHE_SIZE), \ 113 (unsigned)8192) 114 115 #define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */ 116 117 #define NB_SOCKETS 8 118 119 /* Configure how many packets ahead to prefetch, when reading packets */ 120 #define PREFETCH_OFFSET 3 121 122 /* 123 * Configurable number of RX/TX ring descriptors 124 */ 125 #define RTE_TEST_RX_DESC_DEFAULT 1024 126 #define RTE_TEST_TX_DESC_DEFAULT 1024 127 128 /* 129 * These two thresholds were decided on by running the training algorithm on 130 * a 2.5GHz Xeon. These defaults can be overridden by supplying non-zero values 131 * for the med_threshold and high_threshold parameters on the command line. 132 */ 133 #define EMPTY_POLL_MED_THRESHOLD 350000UL 134 #define EMPTY_POLL_HGH_THRESHOLD 580000UL 135 136 #define NUM_TELSTATS RTE_DIM(telstats_strings) 137 138 static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT; 139 static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT; 140 141 /* ethernet addresses of ports */ 142 static struct rte_ether_addr ports_eth_addr[RTE_MAX_ETHPORTS]; 143 144 /* ethernet addresses of ports */ 145 static rte_spinlock_t locks[RTE_MAX_ETHPORTS]; 146 147 /* mask of enabled ports */ 148 static uint32_t enabled_port_mask = 0; 149 /* Ports set in promiscuous mode off by default. */ 150 static int promiscuous_on = 0; 151 /* NUMA is enabled by default. */ 152 static int numa_on = 1; 153 static bool empty_poll_stop; 154 static bool empty_poll_train; 155 volatile bool quit_signal; 156 static struct ep_params *ep_params; 157 static struct ep_policy policy; 158 static long ep_med_edpi, ep_hgh_edpi; 159 /* timer to update telemetry every 500ms */ 160 static struct rte_timer telemetry_timer; 161 162 /* stats index returned by metrics lib */ 163 int telstats_index; 164 165 struct telstats_name { 166 char name[RTE_ETH_XSTATS_NAME_SIZE]; 167 }; 168 169 /* telemetry stats to be reported */ 170 const struct telstats_name telstats_strings[] = { 171 {"empty_poll"}, 172 {"full_poll"}, 173 {"busy_percent"} 174 }; 175 176 /* core busyness in percentage */ 177 enum busy_rate { 178 ZERO = 0, 179 PARTIAL = 50, 180 FULL = 100 181 }; 182 183 /* reference poll count to measure core busyness */ 184 #define DEFAULT_COUNT 10000 185 /* 186 * reference CYCLES to be used to 187 * measure core busyness based on poll count 188 */ 189 #define MIN_CYCLES 1500000ULL 190 #define MAX_CYCLES 22000000ULL 191 192 /* (500ms) */ 193 #define TELEMETRY_INTERVALS_PER_SEC 2 194 195 static int parse_ptype; /**< Parse packet type using rx callback, and */ 196 /**< disabled by default */ 197 198 enum appmode { 199 APP_MODE_DEFAULT = 0, 200 APP_MODE_LEGACY, 201 APP_MODE_EMPTY_POLL, 202 APP_MODE_TELEMETRY, 203 APP_MODE_INTERRUPT, 204 APP_MODE_PMD_MGMT 205 }; 206 207 enum appmode app_mode; 208 209 static enum rte_power_pmd_mgmt_type pmgmt_type; 210 211 enum freq_scale_hint_t 212 { 213 FREQ_LOWER = -1, 214 FREQ_CURRENT = 0, 215 FREQ_HIGHER = 1, 216 FREQ_HIGHEST = 2 217 }; 218 219 struct lcore_rx_queue { 220 uint16_t port_id; 221 uint8_t queue_id; 222 enum freq_scale_hint_t freq_up_hint; 223 uint32_t zero_rx_packet_count; 224 uint32_t idle_hint; 225 } __rte_cache_aligned; 226 227 #define MAX_RX_QUEUE_PER_LCORE 16 228 #define MAX_TX_QUEUE_PER_PORT RTE_MAX_ETHPORTS 229 #define MAX_RX_QUEUE_PER_PORT 128 230 231 #define MAX_RX_QUEUE_INTERRUPT_PER_PORT 16 232 233 234 struct lcore_params lcore_params_array[MAX_LCORE_PARAMS]; 235 static struct lcore_params lcore_params_array_default[] = { 236 {0, 0, 2}, 237 {0, 1, 2}, 238 {0, 2, 2}, 239 {1, 0, 2}, 240 {1, 1, 2}, 241 {1, 2, 2}, 242 {2, 0, 2}, 243 {3, 0, 3}, 244 {3, 1, 3}, 245 }; 246 247 struct lcore_params *lcore_params = lcore_params_array_default; 248 uint16_t nb_lcore_params = RTE_DIM(lcore_params_array_default); 249 250 static struct rte_eth_conf port_conf = { 251 .rxmode = { 252 .mq_mode = ETH_MQ_RX_RSS, 253 .max_rx_pkt_len = RTE_ETHER_MAX_LEN, 254 .split_hdr_size = 0, 255 .offloads = DEV_RX_OFFLOAD_CHECKSUM, 256 }, 257 .rx_adv_conf = { 258 .rss_conf = { 259 .rss_key = NULL, 260 .rss_hf = ETH_RSS_UDP, 261 }, 262 }, 263 .txmode = { 264 .mq_mode = ETH_MQ_TX_NONE, 265 } 266 }; 267 268 static struct rte_mempool * pktmbuf_pool[NB_SOCKETS]; 269 270 271 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 272 273 #ifdef RTE_ARCH_X86 274 #include <rte_hash_crc.h> 275 #define DEFAULT_HASH_FUNC rte_hash_crc 276 #else 277 #include <rte_jhash.h> 278 #define DEFAULT_HASH_FUNC rte_jhash 279 #endif 280 281 struct ipv4_5tuple { 282 uint32_t ip_dst; 283 uint32_t ip_src; 284 uint16_t port_dst; 285 uint16_t port_src; 286 uint8_t proto; 287 } __rte_packed; 288 289 struct ipv6_5tuple { 290 uint8_t ip_dst[IPV6_ADDR_LEN]; 291 uint8_t ip_src[IPV6_ADDR_LEN]; 292 uint16_t port_dst; 293 uint16_t port_src; 294 uint8_t proto; 295 } __rte_packed; 296 297 struct ipv4_l3fwd_route { 298 struct ipv4_5tuple key; 299 uint8_t if_out; 300 }; 301 302 struct ipv6_l3fwd_route { 303 struct ipv6_5tuple key; 304 uint8_t if_out; 305 }; 306 307 static struct ipv4_l3fwd_route ipv4_l3fwd_route_array[] = { 308 {{RTE_IPV4(100,10,0,1), RTE_IPV4(200,10,0,1), 101, 11, IPPROTO_TCP}, 0}, 309 {{RTE_IPV4(100,20,0,2), RTE_IPV4(200,20,0,2), 102, 12, IPPROTO_TCP}, 1}, 310 {{RTE_IPV4(100,30,0,3), RTE_IPV4(200,30,0,3), 103, 13, IPPROTO_TCP}, 2}, 311 {{RTE_IPV4(100,40,0,4), RTE_IPV4(200,40,0,4), 104, 14, IPPROTO_TCP}, 3}, 312 }; 313 314 static struct ipv6_l3fwd_route ipv6_l3fwd_route_array[] = { 315 { 316 { 317 {0xfe, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 318 0x02, 0x1b, 0x21, 0xff, 0xfe, 0x91, 0x38, 0x05}, 319 {0xfe, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 320 0x02, 0x1e, 0x67, 0xff, 0xfe, 0x0d, 0xb6, 0x0a}, 321 1, 10, IPPROTO_UDP 322 }, 4 323 }, 324 }; 325 326 typedef struct rte_hash lookup_struct_t; 327 static lookup_struct_t *ipv4_l3fwd_lookup_struct[NB_SOCKETS]; 328 static lookup_struct_t *ipv6_l3fwd_lookup_struct[NB_SOCKETS]; 329 330 #define L3FWD_HASH_ENTRIES 1024 331 332 static uint16_t ipv4_l3fwd_out_if[L3FWD_HASH_ENTRIES] __rte_cache_aligned; 333 static uint16_t ipv6_l3fwd_out_if[L3FWD_HASH_ENTRIES] __rte_cache_aligned; 334 #endif 335 336 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 337 struct ipv4_l3fwd_route { 338 uint32_t ip; 339 uint8_t depth; 340 uint8_t if_out; 341 }; 342 343 static struct ipv4_l3fwd_route ipv4_l3fwd_route_array[] = { 344 {RTE_IPV4(1,1,1,0), 24, 0}, 345 {RTE_IPV4(2,1,1,0), 24, 1}, 346 {RTE_IPV4(3,1,1,0), 24, 2}, 347 {RTE_IPV4(4,1,1,0), 24, 3}, 348 {RTE_IPV4(5,1,1,0), 24, 4}, 349 {RTE_IPV4(6,1,1,0), 24, 5}, 350 {RTE_IPV4(7,1,1,0), 24, 6}, 351 {RTE_IPV4(8,1,1,0), 24, 7}, 352 }; 353 354 #define IPV4_L3FWD_LPM_MAX_RULES 1024 355 356 typedef struct rte_lpm lookup_struct_t; 357 static lookup_struct_t *ipv4_l3fwd_lookup_struct[NB_SOCKETS]; 358 #endif 359 360 struct lcore_conf { 361 uint16_t n_rx_queue; 362 struct lcore_rx_queue rx_queue_list[MAX_RX_QUEUE_PER_LCORE]; 363 uint16_t n_tx_port; 364 uint16_t tx_port_id[RTE_MAX_ETHPORTS]; 365 uint16_t tx_queue_id[RTE_MAX_ETHPORTS]; 366 struct rte_eth_dev_tx_buffer *tx_buffer[RTE_MAX_ETHPORTS]; 367 lookup_struct_t * ipv4_lookup_struct; 368 lookup_struct_t * ipv6_lookup_struct; 369 } __rte_cache_aligned; 370 371 struct lcore_stats { 372 /* total sleep time in ms since last frequency scaling down */ 373 uint32_t sleep_time; 374 /* number of long sleep recently */ 375 uint32_t nb_long_sleep; 376 /* freq. scaling up trend */ 377 uint32_t trend; 378 /* total packet processed recently */ 379 uint64_t nb_rx_processed; 380 /* total iterations looped recently */ 381 uint64_t nb_iteration_looped; 382 /* 383 * Represents empty and non empty polls 384 * of rte_eth_rx_burst(); 385 * ep_nep[0] holds non empty polls 386 * i.e. 0 < nb_rx <= MAX_BURST 387 * ep_nep[1] holds empty polls. 388 * i.e. nb_rx == 0 389 */ 390 uint64_t ep_nep[2]; 391 /* 392 * Represents full and empty+partial 393 * polls of rte_eth_rx_burst(); 394 * ep_nep[0] holds empty+partial polls. 395 * i.e. 0 <= nb_rx < MAX_BURST 396 * ep_nep[1] holds full polls 397 * i.e. nb_rx == MAX_BURST 398 */ 399 uint64_t fp_nfp[2]; 400 enum busy_rate br; 401 rte_spinlock_t telemetry_lock; 402 } __rte_cache_aligned; 403 404 static struct lcore_conf lcore_conf[RTE_MAX_LCORE] __rte_cache_aligned; 405 static struct lcore_stats stats[RTE_MAX_LCORE] __rte_cache_aligned; 406 static struct rte_timer power_timers[RTE_MAX_LCORE]; 407 408 static inline uint32_t power_idle_heuristic(uint32_t zero_rx_packet_count); 409 static inline enum freq_scale_hint_t power_freq_scaleup_heuristic( \ 410 unsigned int lcore_id, uint16_t port_id, uint16_t queue_id); 411 412 413 /* 414 * These defaults are using the max frequency index (1), a medium index (9) 415 * and a typical low frequency index (14). These can be adjusted to use 416 * different indexes using the relevant command line parameters. 417 */ 418 static uint8_t freq_tlb[] = {14, 9, 1}; 419 420 static int is_done(void) 421 { 422 return quit_signal; 423 } 424 425 /* exit signal handler */ 426 static void 427 signal_exit_now(int sigtype) 428 { 429 430 if (sigtype == SIGINT) 431 quit_signal = true; 432 433 } 434 435 /* Freqency scale down timer callback */ 436 static void 437 power_timer_cb(__rte_unused struct rte_timer *tim, 438 __rte_unused void *arg) 439 { 440 uint64_t hz; 441 float sleep_time_ratio; 442 unsigned lcore_id = rte_lcore_id(); 443 444 /* accumulate total execution time in us when callback is invoked */ 445 sleep_time_ratio = (float)(stats[lcore_id].sleep_time) / 446 (float)SCALING_PERIOD; 447 /** 448 * check whether need to scale down frequency a step if it sleep a lot. 449 */ 450 if (sleep_time_ratio >= SCALING_DOWN_TIME_RATIO_THRESHOLD) { 451 if (rte_power_freq_down) 452 rte_power_freq_down(lcore_id); 453 } 454 else if ( (unsigned)(stats[lcore_id].nb_rx_processed / 455 stats[lcore_id].nb_iteration_looped) < MAX_PKT_BURST) { 456 /** 457 * scale down a step if average packet per iteration less 458 * than expectation. 459 */ 460 if (rte_power_freq_down) 461 rte_power_freq_down(lcore_id); 462 } 463 464 /** 465 * initialize another timer according to current frequency to ensure 466 * timer interval is relatively fixed. 467 */ 468 hz = rte_get_timer_hz(); 469 rte_timer_reset(&power_timers[lcore_id], hz/TIMER_NUMBER_PER_SECOND, 470 SINGLE, lcore_id, power_timer_cb, NULL); 471 472 stats[lcore_id].nb_rx_processed = 0; 473 stats[lcore_id].nb_iteration_looped = 0; 474 475 stats[lcore_id].sleep_time = 0; 476 } 477 478 /* Enqueue a single packet, and send burst if queue is filled */ 479 static inline int 480 send_single_packet(struct rte_mbuf *m, uint16_t port) 481 { 482 uint32_t lcore_id; 483 struct lcore_conf *qconf; 484 485 lcore_id = rte_lcore_id(); 486 qconf = &lcore_conf[lcore_id]; 487 488 rte_eth_tx_buffer(port, qconf->tx_queue_id[port], 489 qconf->tx_buffer[port], m); 490 491 return 0; 492 } 493 494 #ifdef DO_RFC_1812_CHECKS 495 static inline int 496 is_valid_ipv4_pkt(struct rte_ipv4_hdr *pkt, uint32_t link_len) 497 { 498 /* From http://www.rfc-editor.org/rfc/rfc1812.txt section 5.2.2 */ 499 /* 500 * 1. The packet length reported by the Link Layer must be large 501 * enough to hold the minimum length legal IP datagram (20 bytes). 502 */ 503 if (link_len < sizeof(struct rte_ipv4_hdr)) 504 return -1; 505 506 /* 2. The IP checksum must be correct. */ 507 /* this is checked in H/W */ 508 509 /* 510 * 3. The IP version number must be 4. If the version number is not 4 511 * then the packet may be another version of IP, such as IPng or 512 * ST-II. 513 */ 514 if (((pkt->version_ihl) >> 4) != 4) 515 return -3; 516 /* 517 * 4. The IP header length field must be large enough to hold the 518 * minimum length legal IP datagram (20 bytes = 5 words). 519 */ 520 if ((pkt->version_ihl & 0xf) < 5) 521 return -4; 522 523 /* 524 * 5. The IP total length field must be large enough to hold the IP 525 * datagram header, whose length is specified in the IP header length 526 * field. 527 */ 528 if (rte_cpu_to_be_16(pkt->total_length) < sizeof(struct rte_ipv4_hdr)) 529 return -5; 530 531 return 0; 532 } 533 #endif 534 535 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 536 static void 537 print_ipv4_key(struct ipv4_5tuple key) 538 { 539 printf("IP dst = %08x, IP src = %08x, port dst = %d, port src = %d, " 540 "proto = %d\n", (unsigned)key.ip_dst, (unsigned)key.ip_src, 541 key.port_dst, key.port_src, key.proto); 542 } 543 static void 544 print_ipv6_key(struct ipv6_5tuple key) 545 { 546 printf( "IP dst = " IPv6_BYTES_FMT ", IP src = " IPv6_BYTES_FMT ", " 547 "port dst = %d, port src = %d, proto = %d\n", 548 IPv6_BYTES(key.ip_dst), IPv6_BYTES(key.ip_src), 549 key.port_dst, key.port_src, key.proto); 550 } 551 552 static inline uint16_t 553 get_ipv4_dst_port(struct rte_ipv4_hdr *ipv4_hdr, uint16_t portid, 554 lookup_struct_t * ipv4_l3fwd_lookup_struct) 555 { 556 struct ipv4_5tuple key; 557 struct rte_tcp_hdr *tcp; 558 struct rte_udp_hdr *udp; 559 int ret = 0; 560 561 key.ip_dst = rte_be_to_cpu_32(ipv4_hdr->dst_addr); 562 key.ip_src = rte_be_to_cpu_32(ipv4_hdr->src_addr); 563 key.proto = ipv4_hdr->next_proto_id; 564 565 switch (ipv4_hdr->next_proto_id) { 566 case IPPROTO_TCP: 567 tcp = (struct rte_tcp_hdr *)((unsigned char *)ipv4_hdr + 568 sizeof(struct rte_ipv4_hdr)); 569 key.port_dst = rte_be_to_cpu_16(tcp->dst_port); 570 key.port_src = rte_be_to_cpu_16(tcp->src_port); 571 break; 572 573 case IPPROTO_UDP: 574 udp = (struct rte_udp_hdr *)((unsigned char *)ipv4_hdr + 575 sizeof(struct rte_ipv4_hdr)); 576 key.port_dst = rte_be_to_cpu_16(udp->dst_port); 577 key.port_src = rte_be_to_cpu_16(udp->src_port); 578 break; 579 580 default: 581 key.port_dst = 0; 582 key.port_src = 0; 583 break; 584 } 585 586 /* Find destination port */ 587 ret = rte_hash_lookup(ipv4_l3fwd_lookup_struct, (const void *)&key); 588 return ((ret < 0) ? portid : ipv4_l3fwd_out_if[ret]); 589 } 590 591 static inline uint16_t 592 get_ipv6_dst_port(struct rte_ipv6_hdr *ipv6_hdr, uint16_t portid, 593 lookup_struct_t *ipv6_l3fwd_lookup_struct) 594 { 595 struct ipv6_5tuple key; 596 struct rte_tcp_hdr *tcp; 597 struct rte_udp_hdr *udp; 598 int ret = 0; 599 600 memcpy(key.ip_dst, ipv6_hdr->dst_addr, IPV6_ADDR_LEN); 601 memcpy(key.ip_src, ipv6_hdr->src_addr, IPV6_ADDR_LEN); 602 603 key.proto = ipv6_hdr->proto; 604 605 switch (ipv6_hdr->proto) { 606 case IPPROTO_TCP: 607 tcp = (struct rte_tcp_hdr *)((unsigned char *) ipv6_hdr + 608 sizeof(struct rte_ipv6_hdr)); 609 key.port_dst = rte_be_to_cpu_16(tcp->dst_port); 610 key.port_src = rte_be_to_cpu_16(tcp->src_port); 611 break; 612 613 case IPPROTO_UDP: 614 udp = (struct rte_udp_hdr *)((unsigned char *) ipv6_hdr + 615 sizeof(struct rte_ipv6_hdr)); 616 key.port_dst = rte_be_to_cpu_16(udp->dst_port); 617 key.port_src = rte_be_to_cpu_16(udp->src_port); 618 break; 619 620 default: 621 key.port_dst = 0; 622 key.port_src = 0; 623 break; 624 } 625 626 /* Find destination port */ 627 ret = rte_hash_lookup(ipv6_l3fwd_lookup_struct, (const void *)&key); 628 return ((ret < 0) ? portid : ipv6_l3fwd_out_if[ret]); 629 } 630 #endif 631 632 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 633 static inline uint16_t 634 get_ipv4_dst_port(struct rte_ipv4_hdr *ipv4_hdr, uint16_t portid, 635 lookup_struct_t *ipv4_l3fwd_lookup_struct) 636 { 637 uint32_t next_hop; 638 639 return ((rte_lpm_lookup(ipv4_l3fwd_lookup_struct, 640 rte_be_to_cpu_32(ipv4_hdr->dst_addr), &next_hop) == 0)? 641 next_hop : portid); 642 } 643 #endif 644 645 static inline void 646 parse_ptype_one(struct rte_mbuf *m) 647 { 648 struct rte_ether_hdr *eth_hdr; 649 uint32_t packet_type = RTE_PTYPE_UNKNOWN; 650 uint16_t ether_type; 651 652 eth_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); 653 ether_type = eth_hdr->ether_type; 654 if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4)) 655 packet_type |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN; 656 else if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6)) 657 packet_type |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN; 658 659 m->packet_type = packet_type; 660 } 661 662 static uint16_t 663 cb_parse_ptype(uint16_t port __rte_unused, uint16_t queue __rte_unused, 664 struct rte_mbuf *pkts[], uint16_t nb_pkts, 665 uint16_t max_pkts __rte_unused, 666 void *user_param __rte_unused) 667 { 668 unsigned int i; 669 670 for (i = 0; i < nb_pkts; ++i) 671 parse_ptype_one(pkts[i]); 672 673 return nb_pkts; 674 } 675 676 static int 677 add_cb_parse_ptype(uint16_t portid, uint16_t queueid) 678 { 679 printf("Port %d: softly parse packet type info\n", portid); 680 if (rte_eth_add_rx_callback(portid, queueid, cb_parse_ptype, NULL)) 681 return 0; 682 683 printf("Failed to add rx callback: port=%d\n", portid); 684 return -1; 685 } 686 687 static inline void 688 l3fwd_simple_forward(struct rte_mbuf *m, uint16_t portid, 689 struct lcore_conf *qconf) 690 { 691 struct rte_ether_hdr *eth_hdr; 692 struct rte_ipv4_hdr *ipv4_hdr; 693 void *d_addr_bytes; 694 uint16_t dst_port; 695 696 eth_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); 697 698 if (RTE_ETH_IS_IPV4_HDR(m->packet_type)) { 699 /* Handle IPv4 headers.*/ 700 ipv4_hdr = 701 rte_pktmbuf_mtod_offset(m, struct rte_ipv4_hdr *, 702 sizeof(struct rte_ether_hdr)); 703 704 #ifdef DO_RFC_1812_CHECKS 705 /* Check to make sure the packet is valid (RFC1812) */ 706 if (is_valid_ipv4_pkt(ipv4_hdr, m->pkt_len) < 0) { 707 rte_pktmbuf_free(m); 708 return; 709 } 710 #endif 711 712 dst_port = get_ipv4_dst_port(ipv4_hdr, portid, 713 qconf->ipv4_lookup_struct); 714 if (dst_port >= RTE_MAX_ETHPORTS || 715 (enabled_port_mask & 1 << dst_port) == 0) 716 dst_port = portid; 717 718 /* 02:00:00:00:00:xx */ 719 d_addr_bytes = ð_hdr->d_addr.addr_bytes[0]; 720 *((uint64_t *)d_addr_bytes) = 721 0x000000000002 + ((uint64_t)dst_port << 40); 722 723 #ifdef DO_RFC_1812_CHECKS 724 /* Update time to live and header checksum */ 725 --(ipv4_hdr->time_to_live); 726 ++(ipv4_hdr->hdr_checksum); 727 #endif 728 729 /* src addr */ 730 rte_ether_addr_copy(&ports_eth_addr[dst_port], 731 ð_hdr->s_addr); 732 733 send_single_packet(m, dst_port); 734 } else if (RTE_ETH_IS_IPV6_HDR(m->packet_type)) { 735 /* Handle IPv6 headers.*/ 736 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 737 struct rte_ipv6_hdr *ipv6_hdr; 738 739 ipv6_hdr = 740 rte_pktmbuf_mtod_offset(m, struct rte_ipv6_hdr *, 741 sizeof(struct rte_ether_hdr)); 742 743 dst_port = get_ipv6_dst_port(ipv6_hdr, portid, 744 qconf->ipv6_lookup_struct); 745 746 if (dst_port >= RTE_MAX_ETHPORTS || 747 (enabled_port_mask & 1 << dst_port) == 0) 748 dst_port = portid; 749 750 /* 02:00:00:00:00:xx */ 751 d_addr_bytes = ð_hdr->d_addr.addr_bytes[0]; 752 *((uint64_t *)d_addr_bytes) = 753 0x000000000002 + ((uint64_t)dst_port << 40); 754 755 /* src addr */ 756 rte_ether_addr_copy(&ports_eth_addr[dst_port], 757 ð_hdr->s_addr); 758 759 send_single_packet(m, dst_port); 760 #else 761 /* We don't currently handle IPv6 packets in LPM mode. */ 762 rte_pktmbuf_free(m); 763 #endif 764 } else 765 rte_pktmbuf_free(m); 766 767 } 768 769 #define MINIMUM_SLEEP_TIME 1 770 #define SUSPEND_THRESHOLD 300 771 772 static inline uint32_t 773 power_idle_heuristic(uint32_t zero_rx_packet_count) 774 { 775 /* If zero count is less than 100, sleep 1us */ 776 if (zero_rx_packet_count < SUSPEND_THRESHOLD) 777 return MINIMUM_SLEEP_TIME; 778 /* If zero count is less than 1000, sleep 100 us which is the 779 minimum latency switching from C3/C6 to C0 780 */ 781 else 782 return SUSPEND_THRESHOLD; 783 } 784 785 static inline enum freq_scale_hint_t 786 power_freq_scaleup_heuristic(unsigned lcore_id, 787 uint16_t port_id, 788 uint16_t queue_id) 789 { 790 uint32_t rxq_count = rte_eth_rx_queue_count(port_id, queue_id); 791 /** 792 * HW Rx queue size is 128 by default, Rx burst read at maximum 32 entries 793 * per iteration 794 */ 795 #define FREQ_GEAR1_RX_PACKET_THRESHOLD MAX_PKT_BURST 796 #define FREQ_GEAR2_RX_PACKET_THRESHOLD (MAX_PKT_BURST*2) 797 #define FREQ_GEAR3_RX_PACKET_THRESHOLD (MAX_PKT_BURST*3) 798 #define FREQ_UP_TREND1_ACC 1 799 #define FREQ_UP_TREND2_ACC 100 800 #define FREQ_UP_THRESHOLD 10000 801 802 if (likely(rxq_count > FREQ_GEAR3_RX_PACKET_THRESHOLD)) { 803 stats[lcore_id].trend = 0; 804 return FREQ_HIGHEST; 805 } else if (likely(rxq_count > FREQ_GEAR2_RX_PACKET_THRESHOLD)) 806 stats[lcore_id].trend += FREQ_UP_TREND2_ACC; 807 else if (likely(rxq_count > FREQ_GEAR1_RX_PACKET_THRESHOLD)) 808 stats[lcore_id].trend += FREQ_UP_TREND1_ACC; 809 810 if (likely(stats[lcore_id].trend > FREQ_UP_THRESHOLD)) { 811 stats[lcore_id].trend = 0; 812 return FREQ_HIGHER; 813 } 814 815 return FREQ_CURRENT; 816 } 817 818 /** 819 * force polling thread sleep until one-shot rx interrupt triggers 820 * @param port_id 821 * Port id. 822 * @param queue_id 823 * Rx queue id. 824 * @return 825 * 0 on success 826 */ 827 static int 828 sleep_until_rx_interrupt(int num, int lcore) 829 { 830 /* 831 * we want to track when we are woken up by traffic so that we can go 832 * back to sleep again without log spamming. Avoid cache line sharing 833 * to prevent threads stepping on each others' toes. 834 */ 835 static struct { 836 bool wakeup; 837 } __rte_cache_aligned status[RTE_MAX_LCORE]; 838 struct rte_epoll_event event[num]; 839 int n, i; 840 uint16_t port_id; 841 uint8_t queue_id; 842 void *data; 843 844 if (status[lcore].wakeup) { 845 RTE_LOG(INFO, L3FWD_POWER, 846 "lcore %u sleeps until interrupt triggers\n", 847 rte_lcore_id()); 848 } 849 850 n = rte_epoll_wait(RTE_EPOLL_PER_THREAD, event, num, 10); 851 for (i = 0; i < n; i++) { 852 data = event[i].epdata.data; 853 port_id = ((uintptr_t)data) >> CHAR_BIT; 854 queue_id = ((uintptr_t)data) & 855 RTE_LEN2MASK(CHAR_BIT, uint8_t); 856 RTE_LOG(INFO, L3FWD_POWER, 857 "lcore %u is waked up from rx interrupt on" 858 " port %d queue %d\n", 859 rte_lcore_id(), port_id, queue_id); 860 } 861 status[lcore].wakeup = n != 0; 862 863 return 0; 864 } 865 866 static void turn_on_off_intr(struct lcore_conf *qconf, bool on) 867 { 868 int i; 869 struct lcore_rx_queue *rx_queue; 870 uint8_t queue_id; 871 uint16_t port_id; 872 873 for (i = 0; i < qconf->n_rx_queue; ++i) { 874 rx_queue = &(qconf->rx_queue_list[i]); 875 port_id = rx_queue->port_id; 876 queue_id = rx_queue->queue_id; 877 878 rte_spinlock_lock(&(locks[port_id])); 879 if (on) 880 rte_eth_dev_rx_intr_enable(port_id, queue_id); 881 else 882 rte_eth_dev_rx_intr_disable(port_id, queue_id); 883 rte_spinlock_unlock(&(locks[port_id])); 884 } 885 } 886 887 static int event_register(struct lcore_conf *qconf) 888 { 889 struct lcore_rx_queue *rx_queue; 890 uint8_t queueid; 891 uint16_t portid; 892 uint32_t data; 893 int ret; 894 int i; 895 896 for (i = 0; i < qconf->n_rx_queue; ++i) { 897 rx_queue = &(qconf->rx_queue_list[i]); 898 portid = rx_queue->port_id; 899 queueid = rx_queue->queue_id; 900 data = portid << CHAR_BIT | queueid; 901 902 ret = rte_eth_dev_rx_intr_ctl_q(portid, queueid, 903 RTE_EPOLL_PER_THREAD, 904 RTE_INTR_EVENT_ADD, 905 (void *)((uintptr_t)data)); 906 if (ret) 907 return ret; 908 } 909 910 return 0; 911 } 912 913 /* main processing loop */ 914 static int main_intr_loop(__rte_unused void *dummy) 915 { 916 struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; 917 unsigned int lcore_id; 918 uint64_t prev_tsc, diff_tsc, cur_tsc; 919 int i, j, nb_rx; 920 uint8_t queueid; 921 uint16_t portid; 922 struct lcore_conf *qconf; 923 struct lcore_rx_queue *rx_queue; 924 uint32_t lcore_rx_idle_count = 0; 925 uint32_t lcore_idle_hint = 0; 926 int intr_en = 0; 927 928 const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / 929 US_PER_S * BURST_TX_DRAIN_US; 930 931 prev_tsc = 0; 932 933 lcore_id = rte_lcore_id(); 934 qconf = &lcore_conf[lcore_id]; 935 936 if (qconf->n_rx_queue == 0) { 937 RTE_LOG(INFO, L3FWD_POWER, "lcore %u has nothing to do\n", 938 lcore_id); 939 return 0; 940 } 941 942 RTE_LOG(INFO, L3FWD_POWER, "entering main interrupt loop on lcore %u\n", 943 lcore_id); 944 945 for (i = 0; i < qconf->n_rx_queue; i++) { 946 portid = qconf->rx_queue_list[i].port_id; 947 queueid = qconf->rx_queue_list[i].queue_id; 948 RTE_LOG(INFO, L3FWD_POWER, 949 " -- lcoreid=%u portid=%u rxqueueid=%hhu\n", 950 lcore_id, portid, queueid); 951 } 952 953 /* add into event wait list */ 954 if (event_register(qconf) == 0) 955 intr_en = 1; 956 else 957 RTE_LOG(INFO, L3FWD_POWER, "RX interrupt won't enable.\n"); 958 959 while (!is_done()) { 960 stats[lcore_id].nb_iteration_looped++; 961 962 cur_tsc = rte_rdtsc(); 963 964 /* 965 * TX burst queue drain 966 */ 967 diff_tsc = cur_tsc - prev_tsc; 968 if (unlikely(diff_tsc > drain_tsc)) { 969 for (i = 0; i < qconf->n_tx_port; ++i) { 970 portid = qconf->tx_port_id[i]; 971 rte_eth_tx_buffer_flush(portid, 972 qconf->tx_queue_id[portid], 973 qconf->tx_buffer[portid]); 974 } 975 prev_tsc = cur_tsc; 976 } 977 978 start_rx: 979 /* 980 * Read packet from RX queues 981 */ 982 lcore_rx_idle_count = 0; 983 for (i = 0; i < qconf->n_rx_queue; ++i) { 984 rx_queue = &(qconf->rx_queue_list[i]); 985 rx_queue->idle_hint = 0; 986 portid = rx_queue->port_id; 987 queueid = rx_queue->queue_id; 988 989 nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst, 990 MAX_PKT_BURST); 991 992 stats[lcore_id].nb_rx_processed += nb_rx; 993 if (unlikely(nb_rx == 0)) { 994 /** 995 * no packet received from rx queue, try to 996 * sleep for a while forcing CPU enter deeper 997 * C states. 998 */ 999 rx_queue->zero_rx_packet_count++; 1000 1001 if (rx_queue->zero_rx_packet_count <= 1002 MIN_ZERO_POLL_COUNT) 1003 continue; 1004 1005 rx_queue->idle_hint = power_idle_heuristic( 1006 rx_queue->zero_rx_packet_count); 1007 lcore_rx_idle_count++; 1008 } else { 1009 rx_queue->zero_rx_packet_count = 0; 1010 } 1011 1012 /* Prefetch first packets */ 1013 for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++) { 1014 rte_prefetch0(rte_pktmbuf_mtod( 1015 pkts_burst[j], void *)); 1016 } 1017 1018 /* Prefetch and forward already prefetched packets */ 1019 for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) { 1020 rte_prefetch0(rte_pktmbuf_mtod( 1021 pkts_burst[j + PREFETCH_OFFSET], 1022 void *)); 1023 l3fwd_simple_forward( 1024 pkts_burst[j], portid, qconf); 1025 } 1026 1027 /* Forward remaining prefetched packets */ 1028 for (; j < nb_rx; j++) { 1029 l3fwd_simple_forward( 1030 pkts_burst[j], portid, qconf); 1031 } 1032 } 1033 1034 if (unlikely(lcore_rx_idle_count == qconf->n_rx_queue)) { 1035 /** 1036 * All Rx queues empty in recent consecutive polls, 1037 * sleep in a conservative manner, meaning sleep as 1038 * less as possible. 1039 */ 1040 for (i = 1, 1041 lcore_idle_hint = qconf->rx_queue_list[0].idle_hint; 1042 i < qconf->n_rx_queue; ++i) { 1043 rx_queue = &(qconf->rx_queue_list[i]); 1044 if (rx_queue->idle_hint < lcore_idle_hint) 1045 lcore_idle_hint = rx_queue->idle_hint; 1046 } 1047 1048 if (lcore_idle_hint < SUSPEND_THRESHOLD) 1049 /** 1050 * execute "pause" instruction to avoid context 1051 * switch which generally take hundred of 1052 * microseconds for short sleep. 1053 */ 1054 rte_delay_us(lcore_idle_hint); 1055 else { 1056 /* suspend until rx interrupt triggers */ 1057 if (intr_en) { 1058 turn_on_off_intr(qconf, 1); 1059 sleep_until_rx_interrupt( 1060 qconf->n_rx_queue, 1061 lcore_id); 1062 turn_on_off_intr(qconf, 0); 1063 /** 1064 * start receiving packets immediately 1065 */ 1066 if (likely(!is_done())) 1067 goto start_rx; 1068 } 1069 } 1070 stats[lcore_id].sleep_time += lcore_idle_hint; 1071 } 1072 } 1073 1074 return 0; 1075 } 1076 1077 /* main processing loop */ 1078 static int 1079 main_telemetry_loop(__rte_unused void *dummy) 1080 { 1081 struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; 1082 unsigned int lcore_id; 1083 uint64_t prev_tsc, diff_tsc, cur_tsc, prev_tel_tsc; 1084 int i, j, nb_rx; 1085 uint8_t queueid; 1086 uint16_t portid; 1087 struct lcore_conf *qconf; 1088 struct lcore_rx_queue *rx_queue; 1089 uint64_t ep_nep[2] = {0}, fp_nfp[2] = {0}; 1090 uint64_t poll_count; 1091 enum busy_rate br; 1092 1093 const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / 1094 US_PER_S * BURST_TX_DRAIN_US; 1095 1096 poll_count = 0; 1097 prev_tsc = 0; 1098 prev_tel_tsc = 0; 1099 1100 lcore_id = rte_lcore_id(); 1101 qconf = &lcore_conf[lcore_id]; 1102 1103 if (qconf->n_rx_queue == 0) { 1104 RTE_LOG(INFO, L3FWD_POWER, "lcore %u has nothing to do\n", 1105 lcore_id); 1106 return 0; 1107 } 1108 1109 RTE_LOG(INFO, L3FWD_POWER, "entering main telemetry loop on lcore %u\n", 1110 lcore_id); 1111 1112 for (i = 0; i < qconf->n_rx_queue; i++) { 1113 portid = qconf->rx_queue_list[i].port_id; 1114 queueid = qconf->rx_queue_list[i].queue_id; 1115 RTE_LOG(INFO, L3FWD_POWER, " -- lcoreid=%u portid=%u " 1116 "rxqueueid=%hhu\n", lcore_id, portid, queueid); 1117 } 1118 1119 while (!is_done()) { 1120 1121 cur_tsc = rte_rdtsc(); 1122 /* 1123 * TX burst queue drain 1124 */ 1125 diff_tsc = cur_tsc - prev_tsc; 1126 if (unlikely(diff_tsc > drain_tsc)) { 1127 for (i = 0; i < qconf->n_tx_port; ++i) { 1128 portid = qconf->tx_port_id[i]; 1129 rte_eth_tx_buffer_flush(portid, 1130 qconf->tx_queue_id[portid], 1131 qconf->tx_buffer[portid]); 1132 } 1133 prev_tsc = cur_tsc; 1134 } 1135 1136 /* 1137 * Read packet from RX queues 1138 */ 1139 for (i = 0; i < qconf->n_rx_queue; ++i) { 1140 rx_queue = &(qconf->rx_queue_list[i]); 1141 portid = rx_queue->port_id; 1142 queueid = rx_queue->queue_id; 1143 1144 nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst, 1145 MAX_PKT_BURST); 1146 ep_nep[nb_rx == 0]++; 1147 fp_nfp[nb_rx == MAX_PKT_BURST]++; 1148 poll_count++; 1149 if (unlikely(nb_rx == 0)) 1150 continue; 1151 1152 /* Prefetch first packets */ 1153 for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++) { 1154 rte_prefetch0(rte_pktmbuf_mtod( 1155 pkts_burst[j], void *)); 1156 } 1157 1158 /* Prefetch and forward already prefetched packets */ 1159 for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) { 1160 rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[ 1161 j + PREFETCH_OFFSET], void *)); 1162 l3fwd_simple_forward(pkts_burst[j], portid, 1163 qconf); 1164 } 1165 1166 /* Forward remaining prefetched packets */ 1167 for (; j < nb_rx; j++) { 1168 l3fwd_simple_forward(pkts_burst[j], portid, 1169 qconf); 1170 } 1171 } 1172 if (unlikely(poll_count >= DEFAULT_COUNT)) { 1173 diff_tsc = cur_tsc - prev_tel_tsc; 1174 if (diff_tsc >= MAX_CYCLES) { 1175 br = FULL; 1176 } else if (diff_tsc > MIN_CYCLES && 1177 diff_tsc < MAX_CYCLES) { 1178 br = (diff_tsc * 100) / MAX_CYCLES; 1179 } else { 1180 br = ZERO; 1181 } 1182 poll_count = 0; 1183 prev_tel_tsc = cur_tsc; 1184 /* update stats for telemetry */ 1185 rte_spinlock_lock(&stats[lcore_id].telemetry_lock); 1186 stats[lcore_id].ep_nep[0] = ep_nep[0]; 1187 stats[lcore_id].ep_nep[1] = ep_nep[1]; 1188 stats[lcore_id].fp_nfp[0] = fp_nfp[0]; 1189 stats[lcore_id].fp_nfp[1] = fp_nfp[1]; 1190 stats[lcore_id].br = br; 1191 rte_spinlock_unlock(&stats[lcore_id].telemetry_lock); 1192 } 1193 } 1194 1195 return 0; 1196 } 1197 /* main processing loop */ 1198 static int 1199 main_empty_poll_loop(__rte_unused void *dummy) 1200 { 1201 struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; 1202 unsigned int lcore_id; 1203 uint64_t prev_tsc, diff_tsc, cur_tsc; 1204 int i, j, nb_rx; 1205 uint8_t queueid; 1206 uint16_t portid; 1207 struct lcore_conf *qconf; 1208 struct lcore_rx_queue *rx_queue; 1209 1210 const uint64_t drain_tsc = 1211 (rte_get_tsc_hz() + US_PER_S - 1) / 1212 US_PER_S * BURST_TX_DRAIN_US; 1213 1214 prev_tsc = 0; 1215 1216 lcore_id = rte_lcore_id(); 1217 qconf = &lcore_conf[lcore_id]; 1218 1219 if (qconf->n_rx_queue == 0) { 1220 RTE_LOG(INFO, L3FWD_POWER, "lcore %u has nothing to do\n", 1221 lcore_id); 1222 return 0; 1223 } 1224 1225 for (i = 0; i < qconf->n_rx_queue; i++) { 1226 portid = qconf->rx_queue_list[i].port_id; 1227 queueid = qconf->rx_queue_list[i].queue_id; 1228 RTE_LOG(INFO, L3FWD_POWER, " -- lcoreid=%u portid=%u " 1229 "rxqueueid=%hhu\n", lcore_id, portid, queueid); 1230 } 1231 1232 while (!is_done()) { 1233 stats[lcore_id].nb_iteration_looped++; 1234 1235 cur_tsc = rte_rdtsc(); 1236 /* 1237 * TX burst queue drain 1238 */ 1239 diff_tsc = cur_tsc - prev_tsc; 1240 if (unlikely(diff_tsc > drain_tsc)) { 1241 for (i = 0; i < qconf->n_tx_port; ++i) { 1242 portid = qconf->tx_port_id[i]; 1243 rte_eth_tx_buffer_flush(portid, 1244 qconf->tx_queue_id[portid], 1245 qconf->tx_buffer[portid]); 1246 } 1247 prev_tsc = cur_tsc; 1248 } 1249 1250 /* 1251 * Read packet from RX queues 1252 */ 1253 for (i = 0; i < qconf->n_rx_queue; ++i) { 1254 rx_queue = &(qconf->rx_queue_list[i]); 1255 rx_queue->idle_hint = 0; 1256 portid = rx_queue->port_id; 1257 queueid = rx_queue->queue_id; 1258 1259 nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst, 1260 MAX_PKT_BURST); 1261 1262 stats[lcore_id].nb_rx_processed += nb_rx; 1263 1264 if (nb_rx == 0) { 1265 1266 rte_power_empty_poll_stat_update(lcore_id); 1267 1268 continue; 1269 } else { 1270 rte_power_poll_stat_update(lcore_id, nb_rx); 1271 } 1272 1273 1274 /* Prefetch first packets */ 1275 for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++) { 1276 rte_prefetch0(rte_pktmbuf_mtod( 1277 pkts_burst[j], void *)); 1278 } 1279 1280 /* Prefetch and forward already prefetched packets */ 1281 for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) { 1282 rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[ 1283 j + PREFETCH_OFFSET], 1284 void *)); 1285 l3fwd_simple_forward(pkts_burst[j], portid, 1286 qconf); 1287 } 1288 1289 /* Forward remaining prefetched packets */ 1290 for (; j < nb_rx; j++) { 1291 l3fwd_simple_forward(pkts_burst[j], portid, 1292 qconf); 1293 } 1294 1295 } 1296 1297 } 1298 1299 return 0; 1300 } 1301 /* main processing loop */ 1302 static int 1303 main_legacy_loop(__rte_unused void *dummy) 1304 { 1305 struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; 1306 unsigned lcore_id; 1307 uint64_t prev_tsc, diff_tsc, cur_tsc, tim_res_tsc, hz; 1308 uint64_t prev_tsc_power = 0, cur_tsc_power, diff_tsc_power; 1309 int i, j, nb_rx; 1310 uint8_t queueid; 1311 uint16_t portid; 1312 struct lcore_conf *qconf; 1313 struct lcore_rx_queue *rx_queue; 1314 enum freq_scale_hint_t lcore_scaleup_hint; 1315 uint32_t lcore_rx_idle_count = 0; 1316 uint32_t lcore_idle_hint = 0; 1317 int intr_en = 0; 1318 1319 const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * BURST_TX_DRAIN_US; 1320 1321 prev_tsc = 0; 1322 hz = rte_get_timer_hz(); 1323 tim_res_tsc = hz/TIMER_NUMBER_PER_SECOND; 1324 1325 lcore_id = rte_lcore_id(); 1326 qconf = &lcore_conf[lcore_id]; 1327 1328 if (qconf->n_rx_queue == 0) { 1329 RTE_LOG(INFO, L3FWD_POWER, "lcore %u has nothing to do\n", lcore_id); 1330 return 0; 1331 } 1332 1333 RTE_LOG(INFO, L3FWD_POWER, "entering main loop on lcore %u\n", lcore_id); 1334 1335 for (i = 0; i < qconf->n_rx_queue; i++) { 1336 portid = qconf->rx_queue_list[i].port_id; 1337 queueid = qconf->rx_queue_list[i].queue_id; 1338 RTE_LOG(INFO, L3FWD_POWER, " -- lcoreid=%u portid=%u " 1339 "rxqueueid=%hhu\n", lcore_id, portid, queueid); 1340 } 1341 1342 /* add into event wait list */ 1343 if (event_register(qconf) == 0) 1344 intr_en = 1; 1345 else 1346 RTE_LOG(INFO, L3FWD_POWER, "RX interrupt won't enable.\n"); 1347 1348 while (!is_done()) { 1349 stats[lcore_id].nb_iteration_looped++; 1350 1351 cur_tsc = rte_rdtsc(); 1352 cur_tsc_power = cur_tsc; 1353 1354 /* 1355 * TX burst queue drain 1356 */ 1357 diff_tsc = cur_tsc - prev_tsc; 1358 if (unlikely(diff_tsc > drain_tsc)) { 1359 for (i = 0; i < qconf->n_tx_port; ++i) { 1360 portid = qconf->tx_port_id[i]; 1361 rte_eth_tx_buffer_flush(portid, 1362 qconf->tx_queue_id[portid], 1363 qconf->tx_buffer[portid]); 1364 } 1365 prev_tsc = cur_tsc; 1366 } 1367 1368 diff_tsc_power = cur_tsc_power - prev_tsc_power; 1369 if (diff_tsc_power > tim_res_tsc) { 1370 rte_timer_manage(); 1371 prev_tsc_power = cur_tsc_power; 1372 } 1373 1374 start_rx: 1375 /* 1376 * Read packet from RX queues 1377 */ 1378 lcore_scaleup_hint = FREQ_CURRENT; 1379 lcore_rx_idle_count = 0; 1380 for (i = 0; i < qconf->n_rx_queue; ++i) { 1381 rx_queue = &(qconf->rx_queue_list[i]); 1382 rx_queue->idle_hint = 0; 1383 portid = rx_queue->port_id; 1384 queueid = rx_queue->queue_id; 1385 1386 nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst, 1387 MAX_PKT_BURST); 1388 1389 stats[lcore_id].nb_rx_processed += nb_rx; 1390 if (unlikely(nb_rx == 0)) { 1391 /** 1392 * no packet received from rx queue, try to 1393 * sleep for a while forcing CPU enter deeper 1394 * C states. 1395 */ 1396 rx_queue->zero_rx_packet_count++; 1397 1398 if (rx_queue->zero_rx_packet_count <= 1399 MIN_ZERO_POLL_COUNT) 1400 continue; 1401 1402 rx_queue->idle_hint = power_idle_heuristic(\ 1403 rx_queue->zero_rx_packet_count); 1404 lcore_rx_idle_count++; 1405 } else { 1406 rx_queue->zero_rx_packet_count = 0; 1407 1408 /** 1409 * do not scale up frequency immediately as 1410 * user to kernel space communication is costly 1411 * which might impact packet I/O for received 1412 * packets. 1413 */ 1414 rx_queue->freq_up_hint = 1415 power_freq_scaleup_heuristic(lcore_id, 1416 portid, queueid); 1417 } 1418 1419 /* Prefetch first packets */ 1420 for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++) { 1421 rte_prefetch0(rte_pktmbuf_mtod( 1422 pkts_burst[j], void *)); 1423 } 1424 1425 /* Prefetch and forward already prefetched packets */ 1426 for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) { 1427 rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[ 1428 j + PREFETCH_OFFSET], void *)); 1429 l3fwd_simple_forward(pkts_burst[j], portid, 1430 qconf); 1431 } 1432 1433 /* Forward remaining prefetched packets */ 1434 for (; j < nb_rx; j++) { 1435 l3fwd_simple_forward(pkts_burst[j], portid, 1436 qconf); 1437 } 1438 } 1439 1440 if (likely(lcore_rx_idle_count != qconf->n_rx_queue)) { 1441 for (i = 1, lcore_scaleup_hint = 1442 qconf->rx_queue_list[0].freq_up_hint; 1443 i < qconf->n_rx_queue; ++i) { 1444 rx_queue = &(qconf->rx_queue_list[i]); 1445 if (rx_queue->freq_up_hint > 1446 lcore_scaleup_hint) 1447 lcore_scaleup_hint = 1448 rx_queue->freq_up_hint; 1449 } 1450 1451 if (lcore_scaleup_hint == FREQ_HIGHEST) { 1452 if (rte_power_freq_max) 1453 rte_power_freq_max(lcore_id); 1454 } else if (lcore_scaleup_hint == FREQ_HIGHER) { 1455 if (rte_power_freq_up) 1456 rte_power_freq_up(lcore_id); 1457 } 1458 } else { 1459 /** 1460 * All Rx queues empty in recent consecutive polls, 1461 * sleep in a conservative manner, meaning sleep as 1462 * less as possible. 1463 */ 1464 for (i = 1, lcore_idle_hint = 1465 qconf->rx_queue_list[0].idle_hint; 1466 i < qconf->n_rx_queue; ++i) { 1467 rx_queue = &(qconf->rx_queue_list[i]); 1468 if (rx_queue->idle_hint < lcore_idle_hint) 1469 lcore_idle_hint = rx_queue->idle_hint; 1470 } 1471 1472 if (lcore_idle_hint < SUSPEND_THRESHOLD) 1473 /** 1474 * execute "pause" instruction to avoid context 1475 * switch which generally take hundred of 1476 * microseconds for short sleep. 1477 */ 1478 rte_delay_us(lcore_idle_hint); 1479 else { 1480 /* suspend until rx interrupt triggers */ 1481 if (intr_en) { 1482 turn_on_off_intr(qconf, 1); 1483 sleep_until_rx_interrupt( 1484 qconf->n_rx_queue, 1485 lcore_id); 1486 turn_on_off_intr(qconf, 0); 1487 /** 1488 * start receiving packets immediately 1489 */ 1490 if (likely(!is_done())) 1491 goto start_rx; 1492 } 1493 } 1494 stats[lcore_id].sleep_time += lcore_idle_hint; 1495 } 1496 } 1497 1498 return 0; 1499 } 1500 1501 static int 1502 check_lcore_params(void) 1503 { 1504 uint8_t queue, lcore; 1505 uint16_t i; 1506 int socketid; 1507 1508 for (i = 0; i < nb_lcore_params; ++i) { 1509 queue = lcore_params[i].queue_id; 1510 if (queue >= MAX_RX_QUEUE_PER_PORT) { 1511 printf("invalid queue number: %hhu\n", queue); 1512 return -1; 1513 } 1514 lcore = lcore_params[i].lcore_id; 1515 if (!rte_lcore_is_enabled(lcore)) { 1516 printf("error: lcore %hhu is not enabled in lcore " 1517 "mask\n", lcore); 1518 return -1; 1519 } 1520 if ((socketid = rte_lcore_to_socket_id(lcore) != 0) && 1521 (numa_on == 0)) { 1522 printf("warning: lcore %hhu is on socket %d with numa " 1523 "off\n", lcore, socketid); 1524 } 1525 if (app_mode == APP_MODE_TELEMETRY && lcore == rte_lcore_id()) { 1526 printf("cannot enable main core %d in config for telemetry mode\n", 1527 rte_lcore_id()); 1528 return -1; 1529 } 1530 } 1531 return 0; 1532 } 1533 1534 static int 1535 check_port_config(void) 1536 { 1537 unsigned portid; 1538 uint16_t i; 1539 1540 for (i = 0; i < nb_lcore_params; ++i) { 1541 portid = lcore_params[i].port_id; 1542 if ((enabled_port_mask & (1 << portid)) == 0) { 1543 printf("port %u is not enabled in port mask\n", 1544 portid); 1545 return -1; 1546 } 1547 if (!rte_eth_dev_is_valid_port(portid)) { 1548 printf("port %u is not present on the board\n", 1549 portid); 1550 return -1; 1551 } 1552 } 1553 return 0; 1554 } 1555 1556 static uint8_t 1557 get_port_n_rx_queues(const uint16_t port) 1558 { 1559 int queue = -1; 1560 uint16_t i; 1561 1562 for (i = 0; i < nb_lcore_params; ++i) { 1563 if (lcore_params[i].port_id == port && 1564 lcore_params[i].queue_id > queue) 1565 queue = lcore_params[i].queue_id; 1566 } 1567 return (uint8_t)(++queue); 1568 } 1569 1570 static int 1571 init_lcore_rx_queues(void) 1572 { 1573 uint16_t i, nb_rx_queue; 1574 uint8_t lcore; 1575 1576 for (i = 0; i < nb_lcore_params; ++i) { 1577 lcore = lcore_params[i].lcore_id; 1578 nb_rx_queue = lcore_conf[lcore].n_rx_queue; 1579 if (nb_rx_queue >= MAX_RX_QUEUE_PER_LCORE) { 1580 printf("error: too many queues (%u) for lcore: %u\n", 1581 (unsigned)nb_rx_queue + 1, (unsigned)lcore); 1582 return -1; 1583 } else { 1584 lcore_conf[lcore].rx_queue_list[nb_rx_queue].port_id = 1585 lcore_params[i].port_id; 1586 lcore_conf[lcore].rx_queue_list[nb_rx_queue].queue_id = 1587 lcore_params[i].queue_id; 1588 lcore_conf[lcore].n_rx_queue++; 1589 } 1590 } 1591 return 0; 1592 } 1593 1594 /* display usage */ 1595 static void 1596 print_usage(const char *prgname) 1597 { 1598 printf ("%s [EAL options] -- -p PORTMASK -P" 1599 " [--config (port,queue,lcore)[,(port,queue,lcore]]" 1600 " [--high-perf-cores CORELIST" 1601 " [--perf-config (port,queue,hi_perf,lcore_index)[,(port,queue,hi_perf,lcore_index]]" 1602 " [--enable-jumbo [--max-pkt-len PKTLEN]]\n" 1603 " -p PORTMASK: hexadecimal bitmask of ports to configure\n" 1604 " -P : enable promiscuous mode\n" 1605 " --config (port,queue,lcore): rx queues configuration\n" 1606 " --high-perf-cores CORELIST: list of high performance cores\n" 1607 " --perf-config: similar as config, cores specified as indices" 1608 " for bins containing high or regular performance cores\n" 1609 " --no-numa: optional, disable numa awareness\n" 1610 " --enable-jumbo: enable jumbo frame" 1611 " which max packet len is PKTLEN in decimal (64-9600)\n" 1612 " --parse-ptype: parse packet type by software\n" 1613 " --legacy: use legacy interrupt-based scaling\n" 1614 " --empty-poll: enable empty poll detection" 1615 " follow (training_flag, high_threshold, med_threshold)\n" 1616 " --telemetry: enable telemetry mode, to update" 1617 " empty polls, full polls, and core busyness to telemetry\n" 1618 " --interrupt-only: enable interrupt-only mode\n" 1619 " --pmd-mgmt MODE: enable PMD power management mode. " 1620 "Currently supported modes: monitor, pause, scale\n", 1621 prgname); 1622 } 1623 1624 static int parse_max_pkt_len(const char *pktlen) 1625 { 1626 char *end = NULL; 1627 unsigned long len; 1628 1629 /* parse decimal string */ 1630 len = strtoul(pktlen, &end, 10); 1631 if ((pktlen[0] == '\0') || (end == NULL) || (*end != '\0')) 1632 return -1; 1633 1634 if (len == 0) 1635 return -1; 1636 1637 return len; 1638 } 1639 1640 static int 1641 parse_portmask(const char *portmask) 1642 { 1643 char *end = NULL; 1644 unsigned long pm; 1645 1646 /* parse hexadecimal string */ 1647 pm = strtoul(portmask, &end, 16); 1648 if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0')) 1649 return 0; 1650 1651 return pm; 1652 } 1653 1654 static int 1655 parse_config(const char *q_arg) 1656 { 1657 char s[256]; 1658 const char *p, *p0 = q_arg; 1659 char *end; 1660 enum fieldnames { 1661 FLD_PORT = 0, 1662 FLD_QUEUE, 1663 FLD_LCORE, 1664 _NUM_FLD 1665 }; 1666 unsigned long int_fld[_NUM_FLD]; 1667 char *str_fld[_NUM_FLD]; 1668 int i; 1669 unsigned size; 1670 1671 nb_lcore_params = 0; 1672 1673 while ((p = strchr(p0,'(')) != NULL) { 1674 ++p; 1675 if((p0 = strchr(p,')')) == NULL) 1676 return -1; 1677 1678 size = p0 - p; 1679 if(size >= sizeof(s)) 1680 return -1; 1681 1682 snprintf(s, sizeof(s), "%.*s", size, p); 1683 if (rte_strsplit(s, sizeof(s), str_fld, _NUM_FLD, ',') != 1684 _NUM_FLD) 1685 return -1; 1686 for (i = 0; i < _NUM_FLD; i++){ 1687 errno = 0; 1688 int_fld[i] = strtoul(str_fld[i], &end, 0); 1689 if (errno != 0 || end == str_fld[i] || int_fld[i] > 1690 255) 1691 return -1; 1692 } 1693 if (nb_lcore_params >= MAX_LCORE_PARAMS) { 1694 printf("exceeded max number of lcore params: %hu\n", 1695 nb_lcore_params); 1696 return -1; 1697 } 1698 lcore_params_array[nb_lcore_params].port_id = 1699 (uint8_t)int_fld[FLD_PORT]; 1700 lcore_params_array[nb_lcore_params].queue_id = 1701 (uint8_t)int_fld[FLD_QUEUE]; 1702 lcore_params_array[nb_lcore_params].lcore_id = 1703 (uint8_t)int_fld[FLD_LCORE]; 1704 ++nb_lcore_params; 1705 } 1706 lcore_params = lcore_params_array; 1707 1708 return 0; 1709 } 1710 1711 static int 1712 parse_pmd_mgmt_config(const char *name) 1713 { 1714 #define PMD_MGMT_MONITOR "monitor" 1715 #define PMD_MGMT_PAUSE "pause" 1716 #define PMD_MGMT_SCALE "scale" 1717 1718 if (strncmp(PMD_MGMT_MONITOR, name, sizeof(PMD_MGMT_MONITOR)) == 0) { 1719 pmgmt_type = RTE_POWER_MGMT_TYPE_MONITOR; 1720 return 0; 1721 } 1722 1723 if (strncmp(PMD_MGMT_PAUSE, name, sizeof(PMD_MGMT_PAUSE)) == 0) { 1724 pmgmt_type = RTE_POWER_MGMT_TYPE_PAUSE; 1725 return 0; 1726 } 1727 1728 if (strncmp(PMD_MGMT_SCALE, name, sizeof(PMD_MGMT_SCALE)) == 0) { 1729 pmgmt_type = RTE_POWER_MGMT_TYPE_SCALE; 1730 return 0; 1731 } 1732 /* unknown PMD power management mode */ 1733 return -1; 1734 } 1735 1736 static int 1737 parse_ep_config(const char *q_arg) 1738 { 1739 char s[256]; 1740 const char *p = q_arg; 1741 char *end; 1742 int num_arg; 1743 1744 char *str_fld[3]; 1745 1746 int training_flag; 1747 int med_edpi; 1748 int hgh_edpi; 1749 1750 ep_med_edpi = EMPTY_POLL_MED_THRESHOLD; 1751 ep_hgh_edpi = EMPTY_POLL_MED_THRESHOLD; 1752 1753 strlcpy(s, p, sizeof(s)); 1754 1755 num_arg = rte_strsplit(s, sizeof(s), str_fld, 3, ','); 1756 1757 empty_poll_train = false; 1758 1759 if (num_arg == 0) 1760 return 0; 1761 1762 if (num_arg == 3) { 1763 1764 training_flag = strtoul(str_fld[0], &end, 0); 1765 med_edpi = strtoul(str_fld[1], &end, 0); 1766 hgh_edpi = strtoul(str_fld[2], &end, 0); 1767 1768 if (training_flag == 1) 1769 empty_poll_train = true; 1770 1771 if (med_edpi > 0) 1772 ep_med_edpi = med_edpi; 1773 1774 if (med_edpi > 0) 1775 ep_hgh_edpi = hgh_edpi; 1776 1777 } else { 1778 1779 return -1; 1780 } 1781 1782 return 0; 1783 1784 } 1785 #define CMD_LINE_OPT_PARSE_PTYPE "parse-ptype" 1786 #define CMD_LINE_OPT_LEGACY "legacy" 1787 #define CMD_LINE_OPT_EMPTY_POLL "empty-poll" 1788 #define CMD_LINE_OPT_INTERRUPT_ONLY "interrupt-only" 1789 #define CMD_LINE_OPT_TELEMETRY "telemetry" 1790 #define CMD_LINE_OPT_PMD_MGMT "pmd-mgmt" 1791 1792 /* Parse the argument given in the command line of the application */ 1793 static int 1794 parse_args(int argc, char **argv) 1795 { 1796 int opt, ret; 1797 char **argvopt; 1798 int option_index; 1799 uint32_t limit; 1800 char *prgname = argv[0]; 1801 static struct option lgopts[] = { 1802 {"config", 1, 0, 0}, 1803 {"perf-config", 1, 0, 0}, 1804 {"high-perf-cores", 1, 0, 0}, 1805 {"no-numa", 0, 0, 0}, 1806 {"enable-jumbo", 0, 0, 0}, 1807 {CMD_LINE_OPT_EMPTY_POLL, 1, 0, 0}, 1808 {CMD_LINE_OPT_PARSE_PTYPE, 0, 0, 0}, 1809 {CMD_LINE_OPT_LEGACY, 0, 0, 0}, 1810 {CMD_LINE_OPT_TELEMETRY, 0, 0, 0}, 1811 {CMD_LINE_OPT_INTERRUPT_ONLY, 0, 0, 0}, 1812 {CMD_LINE_OPT_PMD_MGMT, 1, 0, 0}, 1813 {NULL, 0, 0, 0} 1814 }; 1815 1816 argvopt = argv; 1817 1818 while ((opt = getopt_long(argc, argvopt, "p:l:m:h:P", 1819 lgopts, &option_index)) != EOF) { 1820 1821 switch (opt) { 1822 /* portmask */ 1823 case 'p': 1824 enabled_port_mask = parse_portmask(optarg); 1825 if (enabled_port_mask == 0) { 1826 printf("invalid portmask\n"); 1827 print_usage(prgname); 1828 return -1; 1829 } 1830 break; 1831 case 'P': 1832 printf("Promiscuous mode selected\n"); 1833 promiscuous_on = 1; 1834 break; 1835 case 'l': 1836 limit = parse_max_pkt_len(optarg); 1837 freq_tlb[LOW] = limit; 1838 break; 1839 case 'm': 1840 limit = parse_max_pkt_len(optarg); 1841 freq_tlb[MED] = limit; 1842 break; 1843 case 'h': 1844 limit = parse_max_pkt_len(optarg); 1845 freq_tlb[HGH] = limit; 1846 break; 1847 /* long options */ 1848 case 0: 1849 if (!strncmp(lgopts[option_index].name, "config", 6)) { 1850 ret = parse_config(optarg); 1851 if (ret) { 1852 printf("invalid config\n"); 1853 print_usage(prgname); 1854 return -1; 1855 } 1856 } 1857 1858 if (!strncmp(lgopts[option_index].name, 1859 "perf-config", 11)) { 1860 ret = parse_perf_config(optarg); 1861 if (ret) { 1862 printf("invalid perf-config\n"); 1863 print_usage(prgname); 1864 return -1; 1865 } 1866 } 1867 1868 if (!strncmp(lgopts[option_index].name, 1869 "high-perf-cores", 15)) { 1870 ret = parse_perf_core_list(optarg); 1871 if (ret) { 1872 printf("invalid high-perf-cores\n"); 1873 print_usage(prgname); 1874 return -1; 1875 } 1876 } 1877 1878 if (!strncmp(lgopts[option_index].name, 1879 "no-numa", 7)) { 1880 printf("numa is disabled \n"); 1881 numa_on = 0; 1882 } 1883 1884 if (!strncmp(lgopts[option_index].name, 1885 CMD_LINE_OPT_LEGACY, 1886 sizeof(CMD_LINE_OPT_LEGACY))) { 1887 if (app_mode != APP_MODE_DEFAULT) { 1888 printf(" legacy mode is mutually exclusive with other modes\n"); 1889 return -1; 1890 } 1891 app_mode = APP_MODE_LEGACY; 1892 printf("legacy mode is enabled\n"); 1893 } 1894 1895 if (!strncmp(lgopts[option_index].name, 1896 CMD_LINE_OPT_EMPTY_POLL, 10)) { 1897 if (app_mode != APP_MODE_DEFAULT) { 1898 printf(" empty-poll mode is mutually exclusive with other modes\n"); 1899 return -1; 1900 } 1901 app_mode = APP_MODE_EMPTY_POLL; 1902 ret = parse_ep_config(optarg); 1903 1904 if (ret) { 1905 printf("invalid empty poll config\n"); 1906 print_usage(prgname); 1907 return -1; 1908 } 1909 printf("empty-poll is enabled\n"); 1910 } 1911 1912 if (!strncmp(lgopts[option_index].name, 1913 CMD_LINE_OPT_TELEMETRY, 1914 sizeof(CMD_LINE_OPT_TELEMETRY))) { 1915 if (app_mode != APP_MODE_DEFAULT) { 1916 printf(" telemetry mode is mutually exclusive with other modes\n"); 1917 return -1; 1918 } 1919 app_mode = APP_MODE_TELEMETRY; 1920 printf("telemetry mode is enabled\n"); 1921 } 1922 1923 if (!strncmp(lgopts[option_index].name, 1924 CMD_LINE_OPT_PMD_MGMT, 1925 sizeof(CMD_LINE_OPT_PMD_MGMT))) { 1926 if (app_mode != APP_MODE_DEFAULT) { 1927 printf(" power mgmt mode is mutually exclusive with other modes\n"); 1928 return -1; 1929 } 1930 if (parse_pmd_mgmt_config(optarg) < 0) { 1931 printf(" Invalid PMD power management mode: %s\n", 1932 optarg); 1933 return -1; 1934 } 1935 app_mode = APP_MODE_PMD_MGMT; 1936 printf("PMD power mgmt mode is enabled\n"); 1937 } 1938 if (!strncmp(lgopts[option_index].name, 1939 CMD_LINE_OPT_INTERRUPT_ONLY, 1940 sizeof(CMD_LINE_OPT_INTERRUPT_ONLY))) { 1941 if (app_mode != APP_MODE_DEFAULT) { 1942 printf(" interrupt-only mode is mutually exclusive with other modes\n"); 1943 return -1; 1944 } 1945 app_mode = APP_MODE_INTERRUPT; 1946 printf("interrupt-only mode is enabled\n"); 1947 } 1948 1949 if (!strncmp(lgopts[option_index].name, 1950 "enable-jumbo", 12)) { 1951 struct option lenopts = 1952 {"max-pkt-len", required_argument, \ 1953 0, 0}; 1954 1955 printf("jumbo frame is enabled \n"); 1956 port_conf.rxmode.offloads |= 1957 DEV_RX_OFFLOAD_JUMBO_FRAME; 1958 port_conf.txmode.offloads |= 1959 DEV_TX_OFFLOAD_MULTI_SEGS; 1960 1961 /** 1962 * if no max-pkt-len set, use the default value 1963 * RTE_ETHER_MAX_LEN 1964 */ 1965 if (0 == getopt_long(argc, argvopt, "", 1966 &lenopts, &option_index)) { 1967 ret = parse_max_pkt_len(optarg); 1968 if ((ret < 64) || 1969 (ret > MAX_JUMBO_PKT_LEN)){ 1970 printf("invalid packet " 1971 "length\n"); 1972 print_usage(prgname); 1973 return -1; 1974 } 1975 port_conf.rxmode.max_rx_pkt_len = ret; 1976 } 1977 printf("set jumbo frame " 1978 "max packet length to %u\n", 1979 (unsigned int)port_conf.rxmode.max_rx_pkt_len); 1980 } 1981 1982 if (!strncmp(lgopts[option_index].name, 1983 CMD_LINE_OPT_PARSE_PTYPE, 1984 sizeof(CMD_LINE_OPT_PARSE_PTYPE))) { 1985 printf("soft parse-ptype is enabled\n"); 1986 parse_ptype = 1; 1987 } 1988 1989 break; 1990 1991 default: 1992 print_usage(prgname); 1993 return -1; 1994 } 1995 } 1996 1997 if (optind >= 0) 1998 argv[optind-1] = prgname; 1999 2000 ret = optind-1; 2001 optind = 1; /* reset getopt lib */ 2002 return ret; 2003 } 2004 2005 static void 2006 print_ethaddr(const char *name, const struct rte_ether_addr *eth_addr) 2007 { 2008 char buf[RTE_ETHER_ADDR_FMT_SIZE]; 2009 rte_ether_format_addr(buf, RTE_ETHER_ADDR_FMT_SIZE, eth_addr); 2010 printf("%s%s", name, buf); 2011 } 2012 2013 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 2014 static void 2015 setup_hash(int socketid) 2016 { 2017 struct rte_hash_parameters ipv4_l3fwd_hash_params = { 2018 .name = NULL, 2019 .entries = L3FWD_HASH_ENTRIES, 2020 .key_len = sizeof(struct ipv4_5tuple), 2021 .hash_func = DEFAULT_HASH_FUNC, 2022 .hash_func_init_val = 0, 2023 }; 2024 2025 struct rte_hash_parameters ipv6_l3fwd_hash_params = { 2026 .name = NULL, 2027 .entries = L3FWD_HASH_ENTRIES, 2028 .key_len = sizeof(struct ipv6_5tuple), 2029 .hash_func = DEFAULT_HASH_FUNC, 2030 .hash_func_init_val = 0, 2031 }; 2032 2033 unsigned i; 2034 int ret; 2035 char s[64]; 2036 2037 /* create ipv4 hash */ 2038 snprintf(s, sizeof(s), "ipv4_l3fwd_hash_%d", socketid); 2039 ipv4_l3fwd_hash_params.name = s; 2040 ipv4_l3fwd_hash_params.socket_id = socketid; 2041 ipv4_l3fwd_lookup_struct[socketid] = 2042 rte_hash_create(&ipv4_l3fwd_hash_params); 2043 if (ipv4_l3fwd_lookup_struct[socketid] == NULL) 2044 rte_exit(EXIT_FAILURE, "Unable to create the l3fwd hash on " 2045 "socket %d\n", socketid); 2046 2047 /* create ipv6 hash */ 2048 snprintf(s, sizeof(s), "ipv6_l3fwd_hash_%d", socketid); 2049 ipv6_l3fwd_hash_params.name = s; 2050 ipv6_l3fwd_hash_params.socket_id = socketid; 2051 ipv6_l3fwd_lookup_struct[socketid] = 2052 rte_hash_create(&ipv6_l3fwd_hash_params); 2053 if (ipv6_l3fwd_lookup_struct[socketid] == NULL) 2054 rte_exit(EXIT_FAILURE, "Unable to create the l3fwd hash on " 2055 "socket %d\n", socketid); 2056 2057 2058 /* populate the ipv4 hash */ 2059 for (i = 0; i < RTE_DIM(ipv4_l3fwd_route_array); i++) { 2060 ret = rte_hash_add_key (ipv4_l3fwd_lookup_struct[socketid], 2061 (void *) &ipv4_l3fwd_route_array[i].key); 2062 if (ret < 0) { 2063 rte_exit(EXIT_FAILURE, "Unable to add entry %u to the" 2064 "l3fwd hash on socket %d\n", i, socketid); 2065 } 2066 ipv4_l3fwd_out_if[ret] = ipv4_l3fwd_route_array[i].if_out; 2067 printf("Hash: Adding key\n"); 2068 print_ipv4_key(ipv4_l3fwd_route_array[i].key); 2069 } 2070 2071 /* populate the ipv6 hash */ 2072 for (i = 0; i < RTE_DIM(ipv6_l3fwd_route_array); i++) { 2073 ret = rte_hash_add_key (ipv6_l3fwd_lookup_struct[socketid], 2074 (void *) &ipv6_l3fwd_route_array[i].key); 2075 if (ret < 0) { 2076 rte_exit(EXIT_FAILURE, "Unable to add entry %u to the" 2077 "l3fwd hash on socket %d\n", i, socketid); 2078 } 2079 ipv6_l3fwd_out_if[ret] = ipv6_l3fwd_route_array[i].if_out; 2080 printf("Hash: Adding key\n"); 2081 print_ipv6_key(ipv6_l3fwd_route_array[i].key); 2082 } 2083 } 2084 #endif 2085 2086 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 2087 static void 2088 setup_lpm(int socketid) 2089 { 2090 unsigned i; 2091 int ret; 2092 char s[64]; 2093 2094 /* create the LPM table */ 2095 struct rte_lpm_config lpm_ipv4_config; 2096 2097 lpm_ipv4_config.max_rules = IPV4_L3FWD_LPM_MAX_RULES; 2098 lpm_ipv4_config.number_tbl8s = 256; 2099 lpm_ipv4_config.flags = 0; 2100 2101 snprintf(s, sizeof(s), "IPV4_L3FWD_LPM_%d", socketid); 2102 ipv4_l3fwd_lookup_struct[socketid] = 2103 rte_lpm_create(s, socketid, &lpm_ipv4_config); 2104 if (ipv4_l3fwd_lookup_struct[socketid] == NULL) 2105 rte_exit(EXIT_FAILURE, "Unable to create the l3fwd LPM table" 2106 " on socket %d\n", socketid); 2107 2108 /* populate the LPM table */ 2109 for (i = 0; i < RTE_DIM(ipv4_l3fwd_route_array); i++) { 2110 ret = rte_lpm_add(ipv4_l3fwd_lookup_struct[socketid], 2111 ipv4_l3fwd_route_array[i].ip, 2112 ipv4_l3fwd_route_array[i].depth, 2113 ipv4_l3fwd_route_array[i].if_out); 2114 2115 if (ret < 0) { 2116 rte_exit(EXIT_FAILURE, "Unable to add entry %u to the " 2117 "l3fwd LPM table on socket %d\n", 2118 i, socketid); 2119 } 2120 2121 printf("LPM: Adding route 0x%08x / %d (%d)\n", 2122 (unsigned)ipv4_l3fwd_route_array[i].ip, 2123 ipv4_l3fwd_route_array[i].depth, 2124 ipv4_l3fwd_route_array[i].if_out); 2125 } 2126 } 2127 #endif 2128 2129 static int 2130 init_mem(unsigned nb_mbuf) 2131 { 2132 struct lcore_conf *qconf; 2133 int socketid; 2134 unsigned lcore_id; 2135 char s[64]; 2136 2137 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { 2138 if (rte_lcore_is_enabled(lcore_id) == 0) 2139 continue; 2140 2141 if (numa_on) 2142 socketid = rte_lcore_to_socket_id(lcore_id); 2143 else 2144 socketid = 0; 2145 2146 if (socketid >= NB_SOCKETS) { 2147 rte_exit(EXIT_FAILURE, "Socket %d of lcore %u is " 2148 "out of range %d\n", socketid, 2149 lcore_id, NB_SOCKETS); 2150 } 2151 if (pktmbuf_pool[socketid] == NULL) { 2152 snprintf(s, sizeof(s), "mbuf_pool_%d", socketid); 2153 pktmbuf_pool[socketid] = 2154 rte_pktmbuf_pool_create(s, nb_mbuf, 2155 MEMPOOL_CACHE_SIZE, 0, 2156 RTE_MBUF_DEFAULT_BUF_SIZE, 2157 socketid); 2158 if (pktmbuf_pool[socketid] == NULL) 2159 rte_exit(EXIT_FAILURE, 2160 "Cannot init mbuf pool on socket %d\n", 2161 socketid); 2162 else 2163 printf("Allocated mbuf pool on socket %d\n", 2164 socketid); 2165 2166 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 2167 setup_lpm(socketid); 2168 #else 2169 setup_hash(socketid); 2170 #endif 2171 } 2172 qconf = &lcore_conf[lcore_id]; 2173 qconf->ipv4_lookup_struct = ipv4_l3fwd_lookup_struct[socketid]; 2174 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 2175 qconf->ipv6_lookup_struct = ipv6_l3fwd_lookup_struct[socketid]; 2176 #endif 2177 } 2178 return 0; 2179 } 2180 2181 /* Check the link status of all ports in up to 9s, and print them finally */ 2182 static void 2183 check_all_ports_link_status(uint32_t port_mask) 2184 { 2185 #define CHECK_INTERVAL 100 /* 100ms */ 2186 #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */ 2187 uint8_t count, all_ports_up, print_flag = 0; 2188 uint16_t portid; 2189 struct rte_eth_link link; 2190 int ret; 2191 char link_status_text[RTE_ETH_LINK_MAX_STR_LEN]; 2192 2193 printf("\nChecking link status"); 2194 fflush(stdout); 2195 for (count = 0; count <= MAX_CHECK_TIME; count++) { 2196 all_ports_up = 1; 2197 RTE_ETH_FOREACH_DEV(portid) { 2198 if ((port_mask & (1 << portid)) == 0) 2199 continue; 2200 memset(&link, 0, sizeof(link)); 2201 ret = rte_eth_link_get_nowait(portid, &link); 2202 if (ret < 0) { 2203 all_ports_up = 0; 2204 if (print_flag == 1) 2205 printf("Port %u link get failed: %s\n", 2206 portid, rte_strerror(-ret)); 2207 continue; 2208 } 2209 /* print link status if flag set */ 2210 if (print_flag == 1) { 2211 rte_eth_link_to_str(link_status_text, 2212 sizeof(link_status_text), &link); 2213 printf("Port %d %s\n", portid, 2214 link_status_text); 2215 continue; 2216 } 2217 /* clear all_ports_up flag if any link down */ 2218 if (link.link_status == ETH_LINK_DOWN) { 2219 all_ports_up = 0; 2220 break; 2221 } 2222 } 2223 /* after finally printing all link status, get out */ 2224 if (print_flag == 1) 2225 break; 2226 2227 if (all_ports_up == 0) { 2228 printf("."); 2229 fflush(stdout); 2230 rte_delay_ms(CHECK_INTERVAL); 2231 } 2232 2233 /* set the print_flag if all ports up or timeout */ 2234 if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) { 2235 print_flag = 1; 2236 printf("done\n"); 2237 } 2238 } 2239 } 2240 2241 static int check_ptype(uint16_t portid) 2242 { 2243 int i, ret; 2244 int ptype_l3_ipv4 = 0; 2245 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 2246 int ptype_l3_ipv6 = 0; 2247 #endif 2248 uint32_t ptype_mask = RTE_PTYPE_L3_MASK; 2249 2250 ret = rte_eth_dev_get_supported_ptypes(portid, ptype_mask, NULL, 0); 2251 if (ret <= 0) 2252 return 0; 2253 2254 uint32_t ptypes[ret]; 2255 2256 ret = rte_eth_dev_get_supported_ptypes(portid, ptype_mask, ptypes, ret); 2257 for (i = 0; i < ret; ++i) { 2258 if (ptypes[i] & RTE_PTYPE_L3_IPV4) 2259 ptype_l3_ipv4 = 1; 2260 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 2261 if (ptypes[i] & RTE_PTYPE_L3_IPV6) 2262 ptype_l3_ipv6 = 1; 2263 #endif 2264 } 2265 2266 if (ptype_l3_ipv4 == 0) 2267 printf("port %d cannot parse RTE_PTYPE_L3_IPV4\n", portid); 2268 2269 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 2270 if (ptype_l3_ipv6 == 0) 2271 printf("port %d cannot parse RTE_PTYPE_L3_IPV6\n", portid); 2272 #endif 2273 2274 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 2275 if (ptype_l3_ipv4) 2276 #else /* APP_LOOKUP_EXACT_MATCH */ 2277 if (ptype_l3_ipv4 && ptype_l3_ipv6) 2278 #endif 2279 return 1; 2280 2281 return 0; 2282 2283 } 2284 2285 static int 2286 init_power_library(void) 2287 { 2288 enum power_management_env env; 2289 unsigned int lcore_id; 2290 int ret = 0; 2291 2292 RTE_LCORE_FOREACH(lcore_id) { 2293 /* init power management library */ 2294 ret = rte_power_init(lcore_id); 2295 if (ret) { 2296 RTE_LOG(ERR, POWER, 2297 "Library initialization failed on core %u\n", 2298 lcore_id); 2299 return ret; 2300 } 2301 /* we're not supporting the VM channel mode */ 2302 env = rte_power_get_env(); 2303 if (env != PM_ENV_ACPI_CPUFREQ && 2304 env != PM_ENV_PSTATE_CPUFREQ) { 2305 RTE_LOG(ERR, POWER, 2306 "Only ACPI and PSTATE mode are supported\n"); 2307 return -1; 2308 } 2309 } 2310 return ret; 2311 } 2312 2313 static int 2314 deinit_power_library(void) 2315 { 2316 unsigned int lcore_id; 2317 int ret = 0; 2318 2319 RTE_LCORE_FOREACH(lcore_id) { 2320 /* deinit power management library */ 2321 ret = rte_power_exit(lcore_id); 2322 if (ret) { 2323 RTE_LOG(ERR, POWER, 2324 "Library deinitialization failed on core %u\n", 2325 lcore_id); 2326 return ret; 2327 } 2328 } 2329 return ret; 2330 } 2331 2332 static void 2333 get_current_stat_values(uint64_t *values) 2334 { 2335 unsigned int lcore_id = rte_lcore_id(); 2336 struct lcore_conf *qconf; 2337 uint64_t app_eps = 0, app_fps = 0, app_br = 0; 2338 uint64_t count = 0; 2339 2340 RTE_LCORE_FOREACH_WORKER(lcore_id) { 2341 qconf = &lcore_conf[lcore_id]; 2342 if (qconf->n_rx_queue == 0) 2343 continue; 2344 count++; 2345 rte_spinlock_lock(&stats[lcore_id].telemetry_lock); 2346 app_eps += stats[lcore_id].ep_nep[1]; 2347 app_fps += stats[lcore_id].fp_nfp[1]; 2348 app_br += stats[lcore_id].br; 2349 rte_spinlock_unlock(&stats[lcore_id].telemetry_lock); 2350 } 2351 2352 if (count > 0) { 2353 values[0] = app_eps/count; 2354 values[1] = app_fps/count; 2355 values[2] = app_br/count; 2356 } else 2357 memset(values, 0, sizeof(uint64_t) * NUM_TELSTATS); 2358 2359 } 2360 2361 static void 2362 update_telemetry(__rte_unused struct rte_timer *tim, 2363 __rte_unused void *arg) 2364 { 2365 int ret; 2366 uint64_t values[NUM_TELSTATS] = {0}; 2367 2368 get_current_stat_values(values); 2369 ret = rte_metrics_update_values(RTE_METRICS_GLOBAL, telstats_index, 2370 values, RTE_DIM(values)); 2371 if (ret < 0) 2372 RTE_LOG(WARNING, POWER, "failed to update metrcis\n"); 2373 } 2374 2375 static int 2376 handle_app_stats(const char *cmd __rte_unused, 2377 const char *params __rte_unused, 2378 struct rte_tel_data *d) 2379 { 2380 uint64_t values[NUM_TELSTATS] = {0}; 2381 uint32_t i; 2382 2383 rte_tel_data_start_dict(d); 2384 get_current_stat_values(values); 2385 for (i = 0; i < NUM_TELSTATS; i++) 2386 rte_tel_data_add_dict_u64(d, telstats_strings[i].name, 2387 values[i]); 2388 return 0; 2389 } 2390 2391 static void 2392 telemetry_setup_timer(void) 2393 { 2394 int lcore_id = rte_lcore_id(); 2395 uint64_t hz = rte_get_timer_hz(); 2396 uint64_t ticks; 2397 2398 ticks = hz / TELEMETRY_INTERVALS_PER_SEC; 2399 rte_timer_reset_sync(&telemetry_timer, 2400 ticks, 2401 PERIODICAL, 2402 lcore_id, 2403 update_telemetry, 2404 NULL); 2405 } 2406 static void 2407 empty_poll_setup_timer(void) 2408 { 2409 int lcore_id = rte_lcore_id(); 2410 uint64_t hz = rte_get_timer_hz(); 2411 2412 struct ep_params *ep_ptr = ep_params; 2413 2414 ep_ptr->interval_ticks = hz / INTERVALS_PER_SECOND; 2415 2416 rte_timer_reset_sync(&ep_ptr->timer0, 2417 ep_ptr->interval_ticks, 2418 PERIODICAL, 2419 lcore_id, 2420 rte_empty_poll_detection, 2421 (void *)ep_ptr); 2422 2423 } 2424 static int 2425 launch_timer(unsigned int lcore_id) 2426 { 2427 int64_t prev_tsc = 0, cur_tsc, diff_tsc, cycles_10ms; 2428 2429 RTE_SET_USED(lcore_id); 2430 2431 2432 if (rte_get_main_lcore() != lcore_id) { 2433 rte_panic("timer on lcore:%d which is not main core:%d\n", 2434 lcore_id, 2435 rte_get_main_lcore()); 2436 } 2437 2438 RTE_LOG(INFO, POWER, "Bring up the Timer\n"); 2439 2440 if (app_mode == APP_MODE_EMPTY_POLL) 2441 empty_poll_setup_timer(); 2442 else 2443 telemetry_setup_timer(); 2444 2445 cycles_10ms = rte_get_timer_hz() / 100; 2446 2447 while (!is_done()) { 2448 cur_tsc = rte_rdtsc(); 2449 diff_tsc = cur_tsc - prev_tsc; 2450 if (diff_tsc > cycles_10ms) { 2451 rte_timer_manage(); 2452 prev_tsc = cur_tsc; 2453 cycles_10ms = rte_get_timer_hz() / 100; 2454 } 2455 } 2456 2457 RTE_LOG(INFO, POWER, "Timer_subsystem is done\n"); 2458 2459 return 0; 2460 } 2461 2462 static int 2463 autodetect_mode(void) 2464 { 2465 RTE_LOG(NOTICE, L3FWD_POWER, "Operating mode not specified, probing frequency scaling support...\n"); 2466 2467 /* 2468 * Empty poll and telemetry modes have to be specifically requested to 2469 * be enabled, but we can auto-detect between interrupt mode with or 2470 * without frequency scaling. Both ACPI and pstate can be used. 2471 */ 2472 if (rte_power_check_env_supported(PM_ENV_ACPI_CPUFREQ)) 2473 return APP_MODE_LEGACY; 2474 if (rte_power_check_env_supported(PM_ENV_PSTATE_CPUFREQ)) 2475 return APP_MODE_LEGACY; 2476 2477 RTE_LOG(NOTICE, L3FWD_POWER, "Frequency scaling not supported, selecting interrupt-only mode\n"); 2478 2479 return APP_MODE_INTERRUPT; 2480 } 2481 2482 static const char * 2483 mode_to_str(enum appmode mode) 2484 { 2485 switch (mode) { 2486 case APP_MODE_LEGACY: 2487 return "legacy"; 2488 case APP_MODE_EMPTY_POLL: 2489 return "empty poll"; 2490 case APP_MODE_TELEMETRY: 2491 return "telemetry"; 2492 case APP_MODE_INTERRUPT: 2493 return "interrupt-only"; 2494 case APP_MODE_PMD_MGMT: 2495 return "pmd mgmt"; 2496 default: 2497 return "invalid"; 2498 } 2499 } 2500 2501 int 2502 main(int argc, char **argv) 2503 { 2504 struct lcore_conf *qconf; 2505 struct rte_eth_dev_info dev_info; 2506 struct rte_eth_txconf *txconf; 2507 int ret; 2508 uint16_t nb_ports; 2509 uint16_t queueid; 2510 unsigned lcore_id; 2511 uint64_t hz; 2512 uint32_t n_tx_queue, nb_lcores; 2513 uint32_t dev_rxq_num, dev_txq_num; 2514 uint8_t nb_rx_queue, queue, socketid; 2515 uint16_t portid; 2516 const char *ptr_strings[NUM_TELSTATS]; 2517 2518 /* catch SIGINT and restore cpufreq governor to ondemand */ 2519 signal(SIGINT, signal_exit_now); 2520 2521 /* init EAL */ 2522 ret = rte_eal_init(argc, argv); 2523 if (ret < 0) 2524 rte_exit(EXIT_FAILURE, "Invalid EAL parameters\n"); 2525 argc -= ret; 2526 argv += ret; 2527 2528 /* init RTE timer library to be used late */ 2529 rte_timer_subsystem_init(); 2530 2531 /* parse application arguments (after the EAL ones) */ 2532 ret = parse_args(argc, argv); 2533 if (ret < 0) 2534 rte_exit(EXIT_FAILURE, "Invalid L3FWD parameters\n"); 2535 2536 if (app_mode == APP_MODE_DEFAULT) 2537 app_mode = autodetect_mode(); 2538 2539 RTE_LOG(INFO, L3FWD_POWER, "Selected operation mode: %s\n", 2540 mode_to_str(app_mode)); 2541 2542 /* only legacy and empty poll mode rely on power library */ 2543 if ((app_mode == APP_MODE_LEGACY || app_mode == APP_MODE_EMPTY_POLL) && 2544 init_power_library()) 2545 rte_exit(EXIT_FAILURE, "init_power_library failed\n"); 2546 2547 if (update_lcore_params() < 0) 2548 rte_exit(EXIT_FAILURE, "update_lcore_params failed\n"); 2549 2550 if (check_lcore_params() < 0) 2551 rte_exit(EXIT_FAILURE, "check_lcore_params failed\n"); 2552 2553 ret = init_lcore_rx_queues(); 2554 if (ret < 0) 2555 rte_exit(EXIT_FAILURE, "init_lcore_rx_queues failed\n"); 2556 2557 nb_ports = rte_eth_dev_count_avail(); 2558 2559 if (check_port_config() < 0) 2560 rte_exit(EXIT_FAILURE, "check_port_config failed\n"); 2561 2562 nb_lcores = rte_lcore_count(); 2563 2564 /* initialize all ports */ 2565 RTE_ETH_FOREACH_DEV(portid) { 2566 struct rte_eth_conf local_port_conf = port_conf; 2567 /* not all app modes need interrupts */ 2568 bool need_intr = app_mode == APP_MODE_LEGACY || 2569 app_mode == APP_MODE_INTERRUPT; 2570 2571 /* skip ports that are not enabled */ 2572 if ((enabled_port_mask & (1 << portid)) == 0) { 2573 printf("\nSkipping disabled port %d\n", portid); 2574 continue; 2575 } 2576 2577 /* init port */ 2578 printf("Initializing port %d ... ", portid ); 2579 fflush(stdout); 2580 2581 ret = rte_eth_dev_info_get(portid, &dev_info); 2582 if (ret != 0) 2583 rte_exit(EXIT_FAILURE, 2584 "Error during getting device (port %u) info: %s\n", 2585 portid, strerror(-ret)); 2586 2587 dev_rxq_num = dev_info.max_rx_queues; 2588 dev_txq_num = dev_info.max_tx_queues; 2589 2590 nb_rx_queue = get_port_n_rx_queues(portid); 2591 if (nb_rx_queue > dev_rxq_num) 2592 rte_exit(EXIT_FAILURE, 2593 "Cannot configure not existed rxq: " 2594 "port=%d\n", portid); 2595 2596 n_tx_queue = nb_lcores; 2597 if (n_tx_queue > dev_txq_num) 2598 n_tx_queue = dev_txq_num; 2599 printf("Creating queues: nb_rxq=%d nb_txq=%u... ", 2600 nb_rx_queue, (unsigned)n_tx_queue ); 2601 /* If number of Rx queue is 0, no need to enable Rx interrupt */ 2602 if (nb_rx_queue == 0) 2603 need_intr = false; 2604 2605 if (need_intr) 2606 local_port_conf.intr_conf.rxq = 1; 2607 2608 ret = rte_eth_dev_info_get(portid, &dev_info); 2609 if (ret != 0) 2610 rte_exit(EXIT_FAILURE, 2611 "Error during getting device (port %u) info: %s\n", 2612 portid, strerror(-ret)); 2613 2614 if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE) 2615 local_port_conf.txmode.offloads |= 2616 DEV_TX_OFFLOAD_MBUF_FAST_FREE; 2617 2618 local_port_conf.rx_adv_conf.rss_conf.rss_hf &= 2619 dev_info.flow_type_rss_offloads; 2620 if (local_port_conf.rx_adv_conf.rss_conf.rss_hf != 2621 port_conf.rx_adv_conf.rss_conf.rss_hf) { 2622 printf("Port %u modified RSS hash function based on hardware support," 2623 "requested:%#"PRIx64" configured:%#"PRIx64"\n", 2624 portid, 2625 port_conf.rx_adv_conf.rss_conf.rss_hf, 2626 local_port_conf.rx_adv_conf.rss_conf.rss_hf); 2627 } 2628 2629 ret = rte_eth_dev_configure(portid, nb_rx_queue, 2630 (uint16_t)n_tx_queue, &local_port_conf); 2631 if (ret < 0) 2632 rte_exit(EXIT_FAILURE, "Cannot configure device: " 2633 "err=%d, port=%d\n", ret, portid); 2634 2635 ret = rte_eth_dev_adjust_nb_rx_tx_desc(portid, &nb_rxd, 2636 &nb_txd); 2637 if (ret < 0) 2638 rte_exit(EXIT_FAILURE, 2639 "Cannot adjust number of descriptors: err=%d, port=%d\n", 2640 ret, portid); 2641 2642 ret = rte_eth_macaddr_get(portid, &ports_eth_addr[portid]); 2643 if (ret < 0) 2644 rte_exit(EXIT_FAILURE, 2645 "Cannot get MAC address: err=%d, port=%d\n", 2646 ret, portid); 2647 2648 print_ethaddr(" Address:", &ports_eth_addr[portid]); 2649 printf(", "); 2650 2651 /* init memory */ 2652 ret = init_mem(NB_MBUF); 2653 if (ret < 0) 2654 rte_exit(EXIT_FAILURE, "init_mem failed\n"); 2655 2656 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { 2657 if (rte_lcore_is_enabled(lcore_id) == 0) 2658 continue; 2659 2660 /* Initialize TX buffers */ 2661 qconf = &lcore_conf[lcore_id]; 2662 qconf->tx_buffer[portid] = rte_zmalloc_socket("tx_buffer", 2663 RTE_ETH_TX_BUFFER_SIZE(MAX_PKT_BURST), 0, 2664 rte_eth_dev_socket_id(portid)); 2665 if (qconf->tx_buffer[portid] == NULL) 2666 rte_exit(EXIT_FAILURE, "Can't allocate tx buffer for port %u\n", 2667 portid); 2668 2669 rte_eth_tx_buffer_init(qconf->tx_buffer[portid], MAX_PKT_BURST); 2670 } 2671 2672 /* init one TX queue per couple (lcore,port) */ 2673 queueid = 0; 2674 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { 2675 if (rte_lcore_is_enabled(lcore_id) == 0) 2676 continue; 2677 2678 if (queueid >= dev_txq_num) 2679 continue; 2680 2681 if (numa_on) 2682 socketid = \ 2683 (uint8_t)rte_lcore_to_socket_id(lcore_id); 2684 else 2685 socketid = 0; 2686 2687 printf("txq=%u,%d,%d ", lcore_id, queueid, socketid); 2688 fflush(stdout); 2689 2690 txconf = &dev_info.default_txconf; 2691 txconf->offloads = local_port_conf.txmode.offloads; 2692 ret = rte_eth_tx_queue_setup(portid, queueid, nb_txd, 2693 socketid, txconf); 2694 if (ret < 0) 2695 rte_exit(EXIT_FAILURE, 2696 "rte_eth_tx_queue_setup: err=%d, " 2697 "port=%d\n", ret, portid); 2698 2699 qconf = &lcore_conf[lcore_id]; 2700 qconf->tx_queue_id[portid] = queueid; 2701 queueid++; 2702 2703 qconf->tx_port_id[qconf->n_tx_port] = portid; 2704 qconf->n_tx_port++; 2705 } 2706 printf("\n"); 2707 } 2708 2709 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { 2710 if (rte_lcore_is_enabled(lcore_id) == 0) 2711 continue; 2712 2713 if (app_mode == APP_MODE_LEGACY) { 2714 /* init timer structures for each enabled lcore */ 2715 rte_timer_init(&power_timers[lcore_id]); 2716 hz = rte_get_timer_hz(); 2717 rte_timer_reset(&power_timers[lcore_id], 2718 hz/TIMER_NUMBER_PER_SECOND, 2719 SINGLE, lcore_id, 2720 power_timer_cb, NULL); 2721 } 2722 qconf = &lcore_conf[lcore_id]; 2723 printf("\nInitializing rx queues on lcore %u ... ", lcore_id ); 2724 fflush(stdout); 2725 2726 /* PMD power management mode can only do 1 queue per core */ 2727 if (app_mode == APP_MODE_PMD_MGMT && qconf->n_rx_queue > 1) { 2728 rte_exit(EXIT_FAILURE, 2729 "In PMD power management mode, only one queue per lcore is allowed\n"); 2730 } 2731 2732 /* init RX queues */ 2733 for(queue = 0; queue < qconf->n_rx_queue; ++queue) { 2734 struct rte_eth_rxconf rxq_conf; 2735 2736 portid = qconf->rx_queue_list[queue].port_id; 2737 queueid = qconf->rx_queue_list[queue].queue_id; 2738 2739 if (numa_on) 2740 socketid = \ 2741 (uint8_t)rte_lcore_to_socket_id(lcore_id); 2742 else 2743 socketid = 0; 2744 2745 printf("rxq=%d,%d,%d ", portid, queueid, socketid); 2746 fflush(stdout); 2747 2748 ret = rte_eth_dev_info_get(portid, &dev_info); 2749 if (ret != 0) 2750 rte_exit(EXIT_FAILURE, 2751 "Error during getting device (port %u) info: %s\n", 2752 portid, strerror(-ret)); 2753 2754 rxq_conf = dev_info.default_rxconf; 2755 rxq_conf.offloads = port_conf.rxmode.offloads; 2756 ret = rte_eth_rx_queue_setup(portid, queueid, nb_rxd, 2757 socketid, &rxq_conf, 2758 pktmbuf_pool[socketid]); 2759 if (ret < 0) 2760 rte_exit(EXIT_FAILURE, 2761 "rte_eth_rx_queue_setup: err=%d, " 2762 "port=%d\n", ret, portid); 2763 2764 if (parse_ptype) { 2765 if (add_cb_parse_ptype(portid, queueid) < 0) 2766 rte_exit(EXIT_FAILURE, 2767 "Fail to add ptype cb\n"); 2768 } 2769 2770 if (app_mode == APP_MODE_PMD_MGMT) { 2771 ret = rte_power_ethdev_pmgmt_queue_enable( 2772 lcore_id, portid, queueid, 2773 pmgmt_type); 2774 if (ret < 0) 2775 rte_exit(EXIT_FAILURE, 2776 "rte_power_ethdev_pmgmt_queue_enable: err=%d, port=%d\n", 2777 ret, portid); 2778 } 2779 } 2780 } 2781 2782 printf("\n"); 2783 2784 /* start ports */ 2785 RTE_ETH_FOREACH_DEV(portid) { 2786 if ((enabled_port_mask & (1 << portid)) == 0) { 2787 continue; 2788 } 2789 /* Start device */ 2790 ret = rte_eth_dev_start(portid); 2791 if (ret < 0) 2792 rte_exit(EXIT_FAILURE, "rte_eth_dev_start: err=%d, " 2793 "port=%d\n", ret, portid); 2794 /* 2795 * If enabled, put device in promiscuous mode. 2796 * This allows IO forwarding mode to forward packets 2797 * to itself through 2 cross-connected ports of the 2798 * target machine. 2799 */ 2800 if (promiscuous_on) { 2801 ret = rte_eth_promiscuous_enable(portid); 2802 if (ret != 0) 2803 rte_exit(EXIT_FAILURE, 2804 "rte_eth_promiscuous_enable: err=%s, port=%u\n", 2805 rte_strerror(-ret), portid); 2806 } 2807 /* initialize spinlock for each port */ 2808 rte_spinlock_init(&(locks[portid])); 2809 2810 if (!parse_ptype) 2811 if (!check_ptype(portid)) 2812 rte_exit(EXIT_FAILURE, 2813 "PMD can not provide needed ptypes\n"); 2814 } 2815 2816 check_all_ports_link_status(enabled_port_mask); 2817 2818 if (app_mode == APP_MODE_EMPTY_POLL) { 2819 2820 if (empty_poll_train) { 2821 policy.state = TRAINING; 2822 } else { 2823 policy.state = MED_NORMAL; 2824 policy.med_base_edpi = ep_med_edpi; 2825 policy.hgh_base_edpi = ep_hgh_edpi; 2826 } 2827 2828 ret = rte_power_empty_poll_stat_init(&ep_params, 2829 freq_tlb, 2830 &policy); 2831 if (ret < 0) 2832 rte_exit(EXIT_FAILURE, "empty poll init failed"); 2833 } 2834 2835 2836 /* launch per-lcore init on every lcore */ 2837 if (app_mode == APP_MODE_LEGACY) { 2838 rte_eal_mp_remote_launch(main_legacy_loop, NULL, CALL_MAIN); 2839 } else if (app_mode == APP_MODE_EMPTY_POLL) { 2840 empty_poll_stop = false; 2841 rte_eal_mp_remote_launch(main_empty_poll_loop, NULL, 2842 SKIP_MAIN); 2843 } else if (app_mode == APP_MODE_TELEMETRY) { 2844 unsigned int i; 2845 2846 /* Init metrics library */ 2847 rte_metrics_init(rte_socket_id()); 2848 /** Register stats with metrics library */ 2849 for (i = 0; i < NUM_TELSTATS; i++) 2850 ptr_strings[i] = telstats_strings[i].name; 2851 2852 ret = rte_metrics_reg_names(ptr_strings, NUM_TELSTATS); 2853 if (ret >= 0) 2854 telstats_index = ret; 2855 else 2856 rte_exit(EXIT_FAILURE, "failed to register metrics names"); 2857 2858 RTE_LCORE_FOREACH_WORKER(lcore_id) { 2859 rte_spinlock_init(&stats[lcore_id].telemetry_lock); 2860 } 2861 rte_timer_init(&telemetry_timer); 2862 rte_telemetry_register_cmd("/l3fwd-power/stats", 2863 handle_app_stats, 2864 "Returns global power stats. Parameters: None"); 2865 rte_eal_mp_remote_launch(main_telemetry_loop, NULL, 2866 SKIP_MAIN); 2867 } else if (app_mode == APP_MODE_INTERRUPT) { 2868 rte_eal_mp_remote_launch(main_intr_loop, NULL, CALL_MAIN); 2869 } else if (app_mode == APP_MODE_PMD_MGMT) { 2870 /* reuse telemetry loop for PMD power management mode */ 2871 rte_eal_mp_remote_launch(main_telemetry_loop, NULL, CALL_MAIN); 2872 } 2873 2874 if (app_mode == APP_MODE_EMPTY_POLL || app_mode == APP_MODE_TELEMETRY) 2875 launch_timer(rte_lcore_id()); 2876 2877 RTE_LCORE_FOREACH_WORKER(lcore_id) { 2878 if (rte_eal_wait_lcore(lcore_id) < 0) 2879 return -1; 2880 } 2881 2882 if (app_mode == APP_MODE_PMD_MGMT) { 2883 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { 2884 if (rte_lcore_is_enabled(lcore_id) == 0) 2885 continue; 2886 qconf = &lcore_conf[lcore_id]; 2887 for (queue = 0; queue < qconf->n_rx_queue; ++queue) { 2888 portid = qconf->rx_queue_list[queue].port_id; 2889 queueid = qconf->rx_queue_list[queue].queue_id; 2890 2891 rte_power_ethdev_pmgmt_queue_disable(lcore_id, 2892 portid, queueid); 2893 } 2894 } 2895 } 2896 2897 RTE_ETH_FOREACH_DEV(portid) 2898 { 2899 if ((enabled_port_mask & (1 << portid)) == 0) 2900 continue; 2901 2902 ret = rte_eth_dev_stop(portid); 2903 if (ret != 0) 2904 RTE_LOG(ERR, L3FWD_POWER, "rte_eth_dev_stop: err=%d, port=%u\n", 2905 ret, portid); 2906 2907 rte_eth_dev_close(portid); 2908 } 2909 2910 if (app_mode == APP_MODE_EMPTY_POLL) 2911 rte_power_empty_poll_stat_free(); 2912 2913 if ((app_mode == APP_MODE_LEGACY || app_mode == APP_MODE_EMPTY_POLL) && 2914 deinit_power_library()) 2915 rte_exit(EXIT_FAILURE, "deinit_power_library failed\n"); 2916 2917 if (rte_eal_cleanup() < 0) 2918 RTE_LOG(ERR, L3FWD_POWER, "EAL cleanup failed\n"); 2919 2920 return 0; 2921 } 2922