1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2018 Intel Corporation 3 */ 4 5 #include <stdio.h> 6 #include <stdlib.h> 7 #include <stdint.h> 8 #include <inttypes.h> 9 #include <sys/types.h> 10 #include <string.h> 11 #include <sys/queue.h> 12 #include <stdarg.h> 13 #include <errno.h> 14 #include <getopt.h> 15 #include <unistd.h> 16 #include <signal.h> 17 #include <math.h> 18 19 #include <rte_common.h> 20 #include <rte_byteorder.h> 21 #include <rte_log.h> 22 #include <rte_malloc.h> 23 #include <rte_memory.h> 24 #include <rte_memcpy.h> 25 #include <rte_eal.h> 26 #include <rte_launch.h> 27 #include <rte_atomic.h> 28 #include <rte_cycles.h> 29 #include <rte_prefetch.h> 30 #include <rte_lcore.h> 31 #include <rte_per_lcore.h> 32 #include <rte_branch_prediction.h> 33 #include <rte_interrupts.h> 34 #include <rte_random.h> 35 #include <rte_debug.h> 36 #include <rte_ether.h> 37 #include <rte_ethdev.h> 38 #include <rte_mempool.h> 39 #include <rte_mbuf.h> 40 #include <rte_ip.h> 41 #include <rte_tcp.h> 42 #include <rte_udp.h> 43 #include <rte_string_fns.h> 44 #include <rte_timer.h> 45 #include <rte_power.h> 46 #include <rte_spinlock.h> 47 #include <rte_power_empty_poll.h> 48 #include <rte_metrics.h> 49 #include <rte_telemetry.h> 50 51 #include "perf_core.h" 52 #include "main.h" 53 54 #define RTE_LOGTYPE_L3FWD_POWER RTE_LOGTYPE_USER1 55 56 #define MAX_PKT_BURST 32 57 58 #define MIN_ZERO_POLL_COUNT 10 59 60 /* 100 ms interval */ 61 #define TIMER_NUMBER_PER_SECOND 10 62 /* (10ms) */ 63 #define INTERVALS_PER_SECOND 100 64 /* 100000 us */ 65 #define SCALING_PERIOD (1000000/TIMER_NUMBER_PER_SECOND) 66 #define SCALING_DOWN_TIME_RATIO_THRESHOLD 0.25 67 68 #define APP_LOOKUP_EXACT_MATCH 0 69 #define APP_LOOKUP_LPM 1 70 #define DO_RFC_1812_CHECKS 71 72 #ifndef APP_LOOKUP_METHOD 73 #define APP_LOOKUP_METHOD APP_LOOKUP_LPM 74 #endif 75 76 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 77 #include <rte_hash.h> 78 #elif (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 79 #include <rte_lpm.h> 80 #else 81 #error "APP_LOOKUP_METHOD set to incorrect value" 82 #endif 83 84 #ifndef IPv6_BYTES 85 #define IPv6_BYTES_FMT "%02x%02x:%02x%02x:%02x%02x:%02x%02x:"\ 86 "%02x%02x:%02x%02x:%02x%02x:%02x%02x" 87 #define IPv6_BYTES(addr) \ 88 addr[0], addr[1], addr[2], addr[3], \ 89 addr[4], addr[5], addr[6], addr[7], \ 90 addr[8], addr[9], addr[10], addr[11],\ 91 addr[12], addr[13],addr[14], addr[15] 92 #endif 93 94 #define MAX_JUMBO_PKT_LEN 9600 95 96 #define IPV6_ADDR_LEN 16 97 98 #define MEMPOOL_CACHE_SIZE 256 99 100 /* 101 * This expression is used to calculate the number of mbufs needed depending on 102 * user input, taking into account memory for rx and tx hardware rings, cache 103 * per lcore and mtable per port per lcore. RTE_MAX is used to ensure that 104 * NB_MBUF never goes below a minimum value of 8192. 105 */ 106 107 #define NB_MBUF RTE_MAX ( \ 108 (nb_ports*nb_rx_queue*nb_rxd + \ 109 nb_ports*nb_lcores*MAX_PKT_BURST + \ 110 nb_ports*n_tx_queue*nb_txd + \ 111 nb_lcores*MEMPOOL_CACHE_SIZE), \ 112 (unsigned)8192) 113 114 #define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */ 115 116 #define NB_SOCKETS 8 117 118 /* Configure how many packets ahead to prefetch, when reading packets */ 119 #define PREFETCH_OFFSET 3 120 121 /* 122 * Configurable number of RX/TX ring descriptors 123 */ 124 #define RTE_TEST_RX_DESC_DEFAULT 1024 125 #define RTE_TEST_TX_DESC_DEFAULT 1024 126 127 /* 128 * These two thresholds were decided on by running the training algorithm on 129 * a 2.5GHz Xeon. These defaults can be overridden by supplying non-zero values 130 * for the med_threshold and high_threshold parameters on the command line. 131 */ 132 #define EMPTY_POLL_MED_THRESHOLD 350000UL 133 #define EMPTY_POLL_HGH_THRESHOLD 580000UL 134 135 #define NUM_TELSTATS RTE_DIM(telstats_strings) 136 137 static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT; 138 static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT; 139 140 /* ethernet addresses of ports */ 141 static struct rte_ether_addr ports_eth_addr[RTE_MAX_ETHPORTS]; 142 143 /* ethernet addresses of ports */ 144 static rte_spinlock_t locks[RTE_MAX_ETHPORTS]; 145 146 /* mask of enabled ports */ 147 static uint32_t enabled_port_mask = 0; 148 /* Ports set in promiscuous mode off by default. */ 149 static int promiscuous_on = 0; 150 /* NUMA is enabled by default. */ 151 static int numa_on = 1; 152 static bool empty_poll_stop; 153 static bool empty_poll_train; 154 volatile bool quit_signal; 155 static struct ep_params *ep_params; 156 static struct ep_policy policy; 157 static long ep_med_edpi, ep_hgh_edpi; 158 /* timer to update telemetry every 500ms */ 159 static struct rte_timer telemetry_timer; 160 161 /* stats index returned by metrics lib */ 162 int telstats_index; 163 164 struct telstats_name { 165 char name[RTE_ETH_XSTATS_NAME_SIZE]; 166 }; 167 168 /* telemetry stats to be reported */ 169 const struct telstats_name telstats_strings[] = { 170 {"empty_poll"}, 171 {"full_poll"}, 172 {"busy_percent"} 173 }; 174 175 /* core busyness in percentage */ 176 enum busy_rate { 177 ZERO = 0, 178 PARTIAL = 50, 179 FULL = 100 180 }; 181 182 /* reference poll count to measure core busyness */ 183 #define DEFAULT_COUNT 10000 184 /* 185 * reference CYCLES to be used to 186 * measure core busyness based on poll count 187 */ 188 #define MIN_CYCLES 1500000ULL 189 #define MAX_CYCLES 22000000ULL 190 191 /* (500ms) */ 192 #define TELEMETRY_INTERVALS_PER_SEC 2 193 194 static int parse_ptype; /**< Parse packet type using rx callback, and */ 195 /**< disabled by default */ 196 197 enum appmode { 198 APP_MODE_LEGACY = 0, 199 APP_MODE_EMPTY_POLL, 200 APP_MODE_TELEMETRY 201 }; 202 203 enum appmode app_mode; 204 205 enum freq_scale_hint_t 206 { 207 FREQ_LOWER = -1, 208 FREQ_CURRENT = 0, 209 FREQ_HIGHER = 1, 210 FREQ_HIGHEST = 2 211 }; 212 213 struct lcore_rx_queue { 214 uint16_t port_id; 215 uint8_t queue_id; 216 enum freq_scale_hint_t freq_up_hint; 217 uint32_t zero_rx_packet_count; 218 uint32_t idle_hint; 219 } __rte_cache_aligned; 220 221 #define MAX_RX_QUEUE_PER_LCORE 16 222 #define MAX_TX_QUEUE_PER_PORT RTE_MAX_ETHPORTS 223 #define MAX_RX_QUEUE_PER_PORT 128 224 225 #define MAX_RX_QUEUE_INTERRUPT_PER_PORT 16 226 227 228 struct lcore_params lcore_params_array[MAX_LCORE_PARAMS]; 229 static struct lcore_params lcore_params_array_default[] = { 230 {0, 0, 2}, 231 {0, 1, 2}, 232 {0, 2, 2}, 233 {1, 0, 2}, 234 {1, 1, 2}, 235 {1, 2, 2}, 236 {2, 0, 2}, 237 {3, 0, 3}, 238 {3, 1, 3}, 239 }; 240 241 struct lcore_params *lcore_params = lcore_params_array_default; 242 uint16_t nb_lcore_params = RTE_DIM(lcore_params_array_default); 243 244 static struct rte_eth_conf port_conf = { 245 .rxmode = { 246 .mq_mode = ETH_MQ_RX_RSS, 247 .max_rx_pkt_len = RTE_ETHER_MAX_LEN, 248 .split_hdr_size = 0, 249 .offloads = DEV_RX_OFFLOAD_CHECKSUM, 250 }, 251 .rx_adv_conf = { 252 .rss_conf = { 253 .rss_key = NULL, 254 .rss_hf = ETH_RSS_UDP, 255 }, 256 }, 257 .txmode = { 258 .mq_mode = ETH_MQ_TX_NONE, 259 } 260 }; 261 262 static struct rte_mempool * pktmbuf_pool[NB_SOCKETS]; 263 264 265 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 266 267 #ifdef RTE_ARCH_X86 268 #include <rte_hash_crc.h> 269 #define DEFAULT_HASH_FUNC rte_hash_crc 270 #else 271 #include <rte_jhash.h> 272 #define DEFAULT_HASH_FUNC rte_jhash 273 #endif 274 275 struct ipv4_5tuple { 276 uint32_t ip_dst; 277 uint32_t ip_src; 278 uint16_t port_dst; 279 uint16_t port_src; 280 uint8_t proto; 281 } __rte_packed; 282 283 struct ipv6_5tuple { 284 uint8_t ip_dst[IPV6_ADDR_LEN]; 285 uint8_t ip_src[IPV6_ADDR_LEN]; 286 uint16_t port_dst; 287 uint16_t port_src; 288 uint8_t proto; 289 } __rte_packed; 290 291 struct ipv4_l3fwd_route { 292 struct ipv4_5tuple key; 293 uint8_t if_out; 294 }; 295 296 struct ipv6_l3fwd_route { 297 struct ipv6_5tuple key; 298 uint8_t if_out; 299 }; 300 301 static struct ipv4_l3fwd_route ipv4_l3fwd_route_array[] = { 302 {{RTE_IPV4(100,10,0,1), RTE_IPV4(200,10,0,1), 101, 11, IPPROTO_TCP}, 0}, 303 {{RTE_IPV4(100,20,0,2), RTE_IPV4(200,20,0,2), 102, 12, IPPROTO_TCP}, 1}, 304 {{RTE_IPV4(100,30,0,3), RTE_IPV4(200,30,0,3), 103, 13, IPPROTO_TCP}, 2}, 305 {{RTE_IPV4(100,40,0,4), RTE_IPV4(200,40,0,4), 104, 14, IPPROTO_TCP}, 3}, 306 }; 307 308 static struct ipv6_l3fwd_route ipv6_l3fwd_route_array[] = { 309 { 310 { 311 {0xfe, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 312 0x02, 0x1b, 0x21, 0xff, 0xfe, 0x91, 0x38, 0x05}, 313 {0xfe, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 314 0x02, 0x1e, 0x67, 0xff, 0xfe, 0x0d, 0xb6, 0x0a}, 315 1, 10, IPPROTO_UDP 316 }, 4 317 }, 318 }; 319 320 typedef struct rte_hash lookup_struct_t; 321 static lookup_struct_t *ipv4_l3fwd_lookup_struct[NB_SOCKETS]; 322 static lookup_struct_t *ipv6_l3fwd_lookup_struct[NB_SOCKETS]; 323 324 #define L3FWD_HASH_ENTRIES 1024 325 326 static uint16_t ipv4_l3fwd_out_if[L3FWD_HASH_ENTRIES] __rte_cache_aligned; 327 static uint16_t ipv6_l3fwd_out_if[L3FWD_HASH_ENTRIES] __rte_cache_aligned; 328 #endif 329 330 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 331 struct ipv4_l3fwd_route { 332 uint32_t ip; 333 uint8_t depth; 334 uint8_t if_out; 335 }; 336 337 static struct ipv4_l3fwd_route ipv4_l3fwd_route_array[] = { 338 {RTE_IPV4(1,1,1,0), 24, 0}, 339 {RTE_IPV4(2,1,1,0), 24, 1}, 340 {RTE_IPV4(3,1,1,0), 24, 2}, 341 {RTE_IPV4(4,1,1,0), 24, 3}, 342 {RTE_IPV4(5,1,1,0), 24, 4}, 343 {RTE_IPV4(6,1,1,0), 24, 5}, 344 {RTE_IPV4(7,1,1,0), 24, 6}, 345 {RTE_IPV4(8,1,1,0), 24, 7}, 346 }; 347 348 #define IPV4_L3FWD_LPM_MAX_RULES 1024 349 350 typedef struct rte_lpm lookup_struct_t; 351 static lookup_struct_t *ipv4_l3fwd_lookup_struct[NB_SOCKETS]; 352 #endif 353 354 struct lcore_conf { 355 uint16_t n_rx_queue; 356 struct lcore_rx_queue rx_queue_list[MAX_RX_QUEUE_PER_LCORE]; 357 uint16_t n_tx_port; 358 uint16_t tx_port_id[RTE_MAX_ETHPORTS]; 359 uint16_t tx_queue_id[RTE_MAX_ETHPORTS]; 360 struct rte_eth_dev_tx_buffer *tx_buffer[RTE_MAX_ETHPORTS]; 361 lookup_struct_t * ipv4_lookup_struct; 362 lookup_struct_t * ipv6_lookup_struct; 363 } __rte_cache_aligned; 364 365 struct lcore_stats { 366 /* total sleep time in ms since last frequency scaling down */ 367 uint32_t sleep_time; 368 /* number of long sleep recently */ 369 uint32_t nb_long_sleep; 370 /* freq. scaling up trend */ 371 uint32_t trend; 372 /* total packet processed recently */ 373 uint64_t nb_rx_processed; 374 /* total iterations looped recently */ 375 uint64_t nb_iteration_looped; 376 /* 377 * Represents empty and non empty polls 378 * of rte_eth_rx_burst(); 379 * ep_nep[0] holds non empty polls 380 * i.e. 0 < nb_rx <= MAX_BURST 381 * ep_nep[1] holds empty polls. 382 * i.e. nb_rx == 0 383 */ 384 uint64_t ep_nep[2]; 385 /* 386 * Represents full and empty+partial 387 * polls of rte_eth_rx_burst(); 388 * ep_nep[0] holds empty+partial polls. 389 * i.e. 0 <= nb_rx < MAX_BURST 390 * ep_nep[1] holds full polls 391 * i.e. nb_rx == MAX_BURST 392 */ 393 uint64_t fp_nfp[2]; 394 enum busy_rate br; 395 rte_spinlock_t telemetry_lock; 396 } __rte_cache_aligned; 397 398 static struct lcore_conf lcore_conf[RTE_MAX_LCORE] __rte_cache_aligned; 399 static struct lcore_stats stats[RTE_MAX_LCORE] __rte_cache_aligned; 400 static struct rte_timer power_timers[RTE_MAX_LCORE]; 401 402 static inline uint32_t power_idle_heuristic(uint32_t zero_rx_packet_count); 403 static inline enum freq_scale_hint_t power_freq_scaleup_heuristic( \ 404 unsigned int lcore_id, uint16_t port_id, uint16_t queue_id); 405 406 407 /* 408 * These defaults are using the max frequency index (1), a medium index (9) 409 * and a typical low frequency index (14). These can be adjusted to use 410 * different indexes using the relevant command line parameters. 411 */ 412 static uint8_t freq_tlb[] = {14, 9, 1}; 413 414 static int is_done(void) 415 { 416 return quit_signal; 417 } 418 419 /* exit signal handler */ 420 static void 421 signal_exit_now(int sigtype) 422 { 423 424 if (sigtype == SIGINT) 425 quit_signal = true; 426 427 } 428 429 /* Freqency scale down timer callback */ 430 static void 431 power_timer_cb(__rte_unused struct rte_timer *tim, 432 __rte_unused void *arg) 433 { 434 uint64_t hz; 435 float sleep_time_ratio; 436 unsigned lcore_id = rte_lcore_id(); 437 438 /* accumulate total execution time in us when callback is invoked */ 439 sleep_time_ratio = (float)(stats[lcore_id].sleep_time) / 440 (float)SCALING_PERIOD; 441 /** 442 * check whether need to scale down frequency a step if it sleep a lot. 443 */ 444 if (sleep_time_ratio >= SCALING_DOWN_TIME_RATIO_THRESHOLD) { 445 if (rte_power_freq_down) 446 rte_power_freq_down(lcore_id); 447 } 448 else if ( (unsigned)(stats[lcore_id].nb_rx_processed / 449 stats[lcore_id].nb_iteration_looped) < MAX_PKT_BURST) { 450 /** 451 * scale down a step if average packet per iteration less 452 * than expectation. 453 */ 454 if (rte_power_freq_down) 455 rte_power_freq_down(lcore_id); 456 } 457 458 /** 459 * initialize another timer according to current frequency to ensure 460 * timer interval is relatively fixed. 461 */ 462 hz = rte_get_timer_hz(); 463 rte_timer_reset(&power_timers[lcore_id], hz/TIMER_NUMBER_PER_SECOND, 464 SINGLE, lcore_id, power_timer_cb, NULL); 465 466 stats[lcore_id].nb_rx_processed = 0; 467 stats[lcore_id].nb_iteration_looped = 0; 468 469 stats[lcore_id].sleep_time = 0; 470 } 471 472 /* Enqueue a single packet, and send burst if queue is filled */ 473 static inline int 474 send_single_packet(struct rte_mbuf *m, uint16_t port) 475 { 476 uint32_t lcore_id; 477 struct lcore_conf *qconf; 478 479 lcore_id = rte_lcore_id(); 480 qconf = &lcore_conf[lcore_id]; 481 482 rte_eth_tx_buffer(port, qconf->tx_queue_id[port], 483 qconf->tx_buffer[port], m); 484 485 return 0; 486 } 487 488 #ifdef DO_RFC_1812_CHECKS 489 static inline int 490 is_valid_ipv4_pkt(struct rte_ipv4_hdr *pkt, uint32_t link_len) 491 { 492 /* From http://www.rfc-editor.org/rfc/rfc1812.txt section 5.2.2 */ 493 /* 494 * 1. The packet length reported by the Link Layer must be large 495 * enough to hold the minimum length legal IP datagram (20 bytes). 496 */ 497 if (link_len < sizeof(struct rte_ipv4_hdr)) 498 return -1; 499 500 /* 2. The IP checksum must be correct. */ 501 /* this is checked in H/W */ 502 503 /* 504 * 3. The IP version number must be 4. If the version number is not 4 505 * then the packet may be another version of IP, such as IPng or 506 * ST-II. 507 */ 508 if (((pkt->version_ihl) >> 4) != 4) 509 return -3; 510 /* 511 * 4. The IP header length field must be large enough to hold the 512 * minimum length legal IP datagram (20 bytes = 5 words). 513 */ 514 if ((pkt->version_ihl & 0xf) < 5) 515 return -4; 516 517 /* 518 * 5. The IP total length field must be large enough to hold the IP 519 * datagram header, whose length is specified in the IP header length 520 * field. 521 */ 522 if (rte_cpu_to_be_16(pkt->total_length) < sizeof(struct rte_ipv4_hdr)) 523 return -5; 524 525 return 0; 526 } 527 #endif 528 529 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 530 static void 531 print_ipv4_key(struct ipv4_5tuple key) 532 { 533 printf("IP dst = %08x, IP src = %08x, port dst = %d, port src = %d, " 534 "proto = %d\n", (unsigned)key.ip_dst, (unsigned)key.ip_src, 535 key.port_dst, key.port_src, key.proto); 536 } 537 static void 538 print_ipv6_key(struct ipv6_5tuple key) 539 { 540 printf( "IP dst = " IPv6_BYTES_FMT ", IP src = " IPv6_BYTES_FMT ", " 541 "port dst = %d, port src = %d, proto = %d\n", 542 IPv6_BYTES(key.ip_dst), IPv6_BYTES(key.ip_src), 543 key.port_dst, key.port_src, key.proto); 544 } 545 546 static inline uint16_t 547 get_ipv4_dst_port(struct rte_ipv4_hdr *ipv4_hdr, uint16_t portid, 548 lookup_struct_t * ipv4_l3fwd_lookup_struct) 549 { 550 struct ipv4_5tuple key; 551 struct rte_tcp_hdr *tcp; 552 struct rte_udp_hdr *udp; 553 int ret = 0; 554 555 key.ip_dst = rte_be_to_cpu_32(ipv4_hdr->dst_addr); 556 key.ip_src = rte_be_to_cpu_32(ipv4_hdr->src_addr); 557 key.proto = ipv4_hdr->next_proto_id; 558 559 switch (ipv4_hdr->next_proto_id) { 560 case IPPROTO_TCP: 561 tcp = (struct rte_tcp_hdr *)((unsigned char *)ipv4_hdr + 562 sizeof(struct rte_ipv4_hdr)); 563 key.port_dst = rte_be_to_cpu_16(tcp->dst_port); 564 key.port_src = rte_be_to_cpu_16(tcp->src_port); 565 break; 566 567 case IPPROTO_UDP: 568 udp = (struct rte_udp_hdr *)((unsigned char *)ipv4_hdr + 569 sizeof(struct rte_ipv4_hdr)); 570 key.port_dst = rte_be_to_cpu_16(udp->dst_port); 571 key.port_src = rte_be_to_cpu_16(udp->src_port); 572 break; 573 574 default: 575 key.port_dst = 0; 576 key.port_src = 0; 577 break; 578 } 579 580 /* Find destination port */ 581 ret = rte_hash_lookup(ipv4_l3fwd_lookup_struct, (const void *)&key); 582 return ((ret < 0) ? portid : ipv4_l3fwd_out_if[ret]); 583 } 584 585 static inline uint16_t 586 get_ipv6_dst_port(struct rte_ipv6_hdr *ipv6_hdr, uint16_t portid, 587 lookup_struct_t *ipv6_l3fwd_lookup_struct) 588 { 589 struct ipv6_5tuple key; 590 struct rte_tcp_hdr *tcp; 591 struct rte_udp_hdr *udp; 592 int ret = 0; 593 594 memcpy(key.ip_dst, ipv6_hdr->dst_addr, IPV6_ADDR_LEN); 595 memcpy(key.ip_src, ipv6_hdr->src_addr, IPV6_ADDR_LEN); 596 597 key.proto = ipv6_hdr->proto; 598 599 switch (ipv6_hdr->proto) { 600 case IPPROTO_TCP: 601 tcp = (struct rte_tcp_hdr *)((unsigned char *) ipv6_hdr + 602 sizeof(struct rte_ipv6_hdr)); 603 key.port_dst = rte_be_to_cpu_16(tcp->dst_port); 604 key.port_src = rte_be_to_cpu_16(tcp->src_port); 605 break; 606 607 case IPPROTO_UDP: 608 udp = (struct rte_udp_hdr *)((unsigned char *) ipv6_hdr + 609 sizeof(struct rte_ipv6_hdr)); 610 key.port_dst = rte_be_to_cpu_16(udp->dst_port); 611 key.port_src = rte_be_to_cpu_16(udp->src_port); 612 break; 613 614 default: 615 key.port_dst = 0; 616 key.port_src = 0; 617 break; 618 } 619 620 /* Find destination port */ 621 ret = rte_hash_lookup(ipv6_l3fwd_lookup_struct, (const void *)&key); 622 return ((ret < 0) ? portid : ipv6_l3fwd_out_if[ret]); 623 } 624 #endif 625 626 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 627 static inline uint16_t 628 get_ipv4_dst_port(struct rte_ipv4_hdr *ipv4_hdr, uint16_t portid, 629 lookup_struct_t *ipv4_l3fwd_lookup_struct) 630 { 631 uint32_t next_hop; 632 633 return ((rte_lpm_lookup(ipv4_l3fwd_lookup_struct, 634 rte_be_to_cpu_32(ipv4_hdr->dst_addr), &next_hop) == 0)? 635 next_hop : portid); 636 } 637 #endif 638 639 static inline void 640 parse_ptype_one(struct rte_mbuf *m) 641 { 642 struct rte_ether_hdr *eth_hdr; 643 uint32_t packet_type = RTE_PTYPE_UNKNOWN; 644 uint16_t ether_type; 645 646 eth_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); 647 ether_type = eth_hdr->ether_type; 648 if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4)) 649 packet_type |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN; 650 else if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6)) 651 packet_type |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN; 652 653 m->packet_type = packet_type; 654 } 655 656 static uint16_t 657 cb_parse_ptype(uint16_t port __rte_unused, uint16_t queue __rte_unused, 658 struct rte_mbuf *pkts[], uint16_t nb_pkts, 659 uint16_t max_pkts __rte_unused, 660 void *user_param __rte_unused) 661 { 662 unsigned int i; 663 664 for (i = 0; i < nb_pkts; ++i) 665 parse_ptype_one(pkts[i]); 666 667 return nb_pkts; 668 } 669 670 static int 671 add_cb_parse_ptype(uint16_t portid, uint16_t queueid) 672 { 673 printf("Port %d: softly parse packet type info\n", portid); 674 if (rte_eth_add_rx_callback(portid, queueid, cb_parse_ptype, NULL)) 675 return 0; 676 677 printf("Failed to add rx callback: port=%d\n", portid); 678 return -1; 679 } 680 681 static inline void 682 l3fwd_simple_forward(struct rte_mbuf *m, uint16_t portid, 683 struct lcore_conf *qconf) 684 { 685 struct rte_ether_hdr *eth_hdr; 686 struct rte_ipv4_hdr *ipv4_hdr; 687 void *d_addr_bytes; 688 uint16_t dst_port; 689 690 eth_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); 691 692 if (RTE_ETH_IS_IPV4_HDR(m->packet_type)) { 693 /* Handle IPv4 headers.*/ 694 ipv4_hdr = 695 rte_pktmbuf_mtod_offset(m, struct rte_ipv4_hdr *, 696 sizeof(struct rte_ether_hdr)); 697 698 #ifdef DO_RFC_1812_CHECKS 699 /* Check to make sure the packet is valid (RFC1812) */ 700 if (is_valid_ipv4_pkt(ipv4_hdr, m->pkt_len) < 0) { 701 rte_pktmbuf_free(m); 702 return; 703 } 704 #endif 705 706 dst_port = get_ipv4_dst_port(ipv4_hdr, portid, 707 qconf->ipv4_lookup_struct); 708 if (dst_port >= RTE_MAX_ETHPORTS || 709 (enabled_port_mask & 1 << dst_port) == 0) 710 dst_port = portid; 711 712 /* 02:00:00:00:00:xx */ 713 d_addr_bytes = ð_hdr->d_addr.addr_bytes[0]; 714 *((uint64_t *)d_addr_bytes) = 715 0x000000000002 + ((uint64_t)dst_port << 40); 716 717 #ifdef DO_RFC_1812_CHECKS 718 /* Update time to live and header checksum */ 719 --(ipv4_hdr->time_to_live); 720 ++(ipv4_hdr->hdr_checksum); 721 #endif 722 723 /* src addr */ 724 rte_ether_addr_copy(&ports_eth_addr[dst_port], 725 ð_hdr->s_addr); 726 727 send_single_packet(m, dst_port); 728 } else if (RTE_ETH_IS_IPV6_HDR(m->packet_type)) { 729 /* Handle IPv6 headers.*/ 730 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 731 struct rte_ipv6_hdr *ipv6_hdr; 732 733 ipv6_hdr = 734 rte_pktmbuf_mtod_offset(m, struct rte_ipv6_hdr *, 735 sizeof(struct rte_ether_hdr)); 736 737 dst_port = get_ipv6_dst_port(ipv6_hdr, portid, 738 qconf->ipv6_lookup_struct); 739 740 if (dst_port >= RTE_MAX_ETHPORTS || 741 (enabled_port_mask & 1 << dst_port) == 0) 742 dst_port = portid; 743 744 /* 02:00:00:00:00:xx */ 745 d_addr_bytes = ð_hdr->d_addr.addr_bytes[0]; 746 *((uint64_t *)d_addr_bytes) = 747 0x000000000002 + ((uint64_t)dst_port << 40); 748 749 /* src addr */ 750 rte_ether_addr_copy(&ports_eth_addr[dst_port], 751 ð_hdr->s_addr); 752 753 send_single_packet(m, dst_port); 754 #else 755 /* We don't currently handle IPv6 packets in LPM mode. */ 756 rte_pktmbuf_free(m); 757 #endif 758 } else 759 rte_pktmbuf_free(m); 760 761 } 762 763 #define MINIMUM_SLEEP_TIME 1 764 #define SUSPEND_THRESHOLD 300 765 766 static inline uint32_t 767 power_idle_heuristic(uint32_t zero_rx_packet_count) 768 { 769 /* If zero count is less than 100, sleep 1us */ 770 if (zero_rx_packet_count < SUSPEND_THRESHOLD) 771 return MINIMUM_SLEEP_TIME; 772 /* If zero count is less than 1000, sleep 100 us which is the 773 minimum latency switching from C3/C6 to C0 774 */ 775 else 776 return SUSPEND_THRESHOLD; 777 } 778 779 static inline enum freq_scale_hint_t 780 power_freq_scaleup_heuristic(unsigned lcore_id, 781 uint16_t port_id, 782 uint16_t queue_id) 783 { 784 uint32_t rxq_count = rte_eth_rx_queue_count(port_id, queue_id); 785 /** 786 * HW Rx queue size is 128 by default, Rx burst read at maximum 32 entries 787 * per iteration 788 */ 789 #define FREQ_GEAR1_RX_PACKET_THRESHOLD MAX_PKT_BURST 790 #define FREQ_GEAR2_RX_PACKET_THRESHOLD (MAX_PKT_BURST*2) 791 #define FREQ_GEAR3_RX_PACKET_THRESHOLD (MAX_PKT_BURST*3) 792 #define FREQ_UP_TREND1_ACC 1 793 #define FREQ_UP_TREND2_ACC 100 794 #define FREQ_UP_THRESHOLD 10000 795 796 if (likely(rxq_count > FREQ_GEAR3_RX_PACKET_THRESHOLD)) { 797 stats[lcore_id].trend = 0; 798 return FREQ_HIGHEST; 799 } else if (likely(rxq_count > FREQ_GEAR2_RX_PACKET_THRESHOLD)) 800 stats[lcore_id].trend += FREQ_UP_TREND2_ACC; 801 else if (likely(rxq_count > FREQ_GEAR1_RX_PACKET_THRESHOLD)) 802 stats[lcore_id].trend += FREQ_UP_TREND1_ACC; 803 804 if (likely(stats[lcore_id].trend > FREQ_UP_THRESHOLD)) { 805 stats[lcore_id].trend = 0; 806 return FREQ_HIGHER; 807 } 808 809 return FREQ_CURRENT; 810 } 811 812 /** 813 * force polling thread sleep until one-shot rx interrupt triggers 814 * @param port_id 815 * Port id. 816 * @param queue_id 817 * Rx queue id. 818 * @return 819 * 0 on success 820 */ 821 static int 822 sleep_until_rx_interrupt(int num) 823 { 824 /* 825 * we want to track when we are woken up by traffic so that we can go 826 * back to sleep again without log spamming. 827 */ 828 static bool timeout; 829 struct rte_epoll_event event[num]; 830 int n, i; 831 uint16_t port_id; 832 uint8_t queue_id; 833 void *data; 834 835 if (!timeout) { 836 RTE_LOG(INFO, L3FWD_POWER, 837 "lcore %u sleeps until interrupt triggers\n", 838 rte_lcore_id()); 839 } 840 841 n = rte_epoll_wait(RTE_EPOLL_PER_THREAD, event, num, 10); 842 for (i = 0; i < n; i++) { 843 data = event[i].epdata.data; 844 port_id = ((uintptr_t)data) >> CHAR_BIT; 845 queue_id = ((uintptr_t)data) & 846 RTE_LEN2MASK(CHAR_BIT, uint8_t); 847 RTE_LOG(INFO, L3FWD_POWER, 848 "lcore %u is waked up from rx interrupt on" 849 " port %d queue %d\n", 850 rte_lcore_id(), port_id, queue_id); 851 } 852 timeout = n == 0; 853 854 return 0; 855 } 856 857 static void turn_on_off_intr(struct lcore_conf *qconf, bool on) 858 { 859 int i; 860 struct lcore_rx_queue *rx_queue; 861 uint8_t queue_id; 862 uint16_t port_id; 863 864 for (i = 0; i < qconf->n_rx_queue; ++i) { 865 rx_queue = &(qconf->rx_queue_list[i]); 866 port_id = rx_queue->port_id; 867 queue_id = rx_queue->queue_id; 868 869 rte_spinlock_lock(&(locks[port_id])); 870 if (on) 871 rte_eth_dev_rx_intr_enable(port_id, queue_id); 872 else 873 rte_eth_dev_rx_intr_disable(port_id, queue_id); 874 rte_spinlock_unlock(&(locks[port_id])); 875 } 876 } 877 878 static int event_register(struct lcore_conf *qconf) 879 { 880 struct lcore_rx_queue *rx_queue; 881 uint8_t queueid; 882 uint16_t portid; 883 uint32_t data; 884 int ret; 885 int i; 886 887 for (i = 0; i < qconf->n_rx_queue; ++i) { 888 rx_queue = &(qconf->rx_queue_list[i]); 889 portid = rx_queue->port_id; 890 queueid = rx_queue->queue_id; 891 data = portid << CHAR_BIT | queueid; 892 893 ret = rte_eth_dev_rx_intr_ctl_q(portid, queueid, 894 RTE_EPOLL_PER_THREAD, 895 RTE_INTR_EVENT_ADD, 896 (void *)((uintptr_t)data)); 897 if (ret) 898 return ret; 899 } 900 901 return 0; 902 } 903 /* main processing loop */ 904 static int 905 main_telemetry_loop(__rte_unused void *dummy) 906 { 907 struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; 908 unsigned int lcore_id; 909 uint64_t prev_tsc, diff_tsc, cur_tsc, prev_tel_tsc; 910 int i, j, nb_rx; 911 uint8_t queueid; 912 uint16_t portid; 913 struct lcore_conf *qconf; 914 struct lcore_rx_queue *rx_queue; 915 uint64_t ep_nep[2] = {0}, fp_nfp[2] = {0}; 916 uint64_t poll_count; 917 enum busy_rate br; 918 919 const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / 920 US_PER_S * BURST_TX_DRAIN_US; 921 922 poll_count = 0; 923 prev_tsc = 0; 924 prev_tel_tsc = 0; 925 926 lcore_id = rte_lcore_id(); 927 qconf = &lcore_conf[lcore_id]; 928 929 if (qconf->n_rx_queue == 0) { 930 RTE_LOG(INFO, L3FWD_POWER, "lcore %u has nothing to do\n", 931 lcore_id); 932 return 0; 933 } 934 935 RTE_LOG(INFO, L3FWD_POWER, "entering main telemetry loop on lcore %u\n", 936 lcore_id); 937 938 for (i = 0; i < qconf->n_rx_queue; i++) { 939 portid = qconf->rx_queue_list[i].port_id; 940 queueid = qconf->rx_queue_list[i].queue_id; 941 RTE_LOG(INFO, L3FWD_POWER, " -- lcoreid=%u portid=%u " 942 "rxqueueid=%hhu\n", lcore_id, portid, queueid); 943 } 944 945 while (!is_done()) { 946 947 cur_tsc = rte_rdtsc(); 948 /* 949 * TX burst queue drain 950 */ 951 diff_tsc = cur_tsc - prev_tsc; 952 if (unlikely(diff_tsc > drain_tsc)) { 953 for (i = 0; i < qconf->n_tx_port; ++i) { 954 portid = qconf->tx_port_id[i]; 955 rte_eth_tx_buffer_flush(portid, 956 qconf->tx_queue_id[portid], 957 qconf->tx_buffer[portid]); 958 } 959 prev_tsc = cur_tsc; 960 } 961 962 /* 963 * Read packet from RX queues 964 */ 965 for (i = 0; i < qconf->n_rx_queue; ++i) { 966 rx_queue = &(qconf->rx_queue_list[i]); 967 portid = rx_queue->port_id; 968 queueid = rx_queue->queue_id; 969 970 nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst, 971 MAX_PKT_BURST); 972 ep_nep[nb_rx == 0]++; 973 fp_nfp[nb_rx == MAX_PKT_BURST]++; 974 poll_count++; 975 if (unlikely(nb_rx == 0)) 976 continue; 977 978 /* Prefetch first packets */ 979 for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++) { 980 rte_prefetch0(rte_pktmbuf_mtod( 981 pkts_burst[j], void *)); 982 } 983 984 /* Prefetch and forward already prefetched packets */ 985 for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) { 986 rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[ 987 j + PREFETCH_OFFSET], void *)); 988 l3fwd_simple_forward(pkts_burst[j], portid, 989 qconf); 990 } 991 992 /* Forward remaining prefetched packets */ 993 for (; j < nb_rx; j++) { 994 l3fwd_simple_forward(pkts_burst[j], portid, 995 qconf); 996 } 997 } 998 if (unlikely(poll_count >= DEFAULT_COUNT)) { 999 diff_tsc = cur_tsc - prev_tel_tsc; 1000 if (diff_tsc >= MAX_CYCLES) { 1001 br = FULL; 1002 } else if (diff_tsc > MIN_CYCLES && 1003 diff_tsc < MAX_CYCLES) { 1004 br = (diff_tsc * 100) / MAX_CYCLES; 1005 } else { 1006 br = ZERO; 1007 } 1008 poll_count = 0; 1009 prev_tel_tsc = cur_tsc; 1010 /* update stats for telemetry */ 1011 rte_spinlock_lock(&stats[lcore_id].telemetry_lock); 1012 stats[lcore_id].ep_nep[0] = ep_nep[0]; 1013 stats[lcore_id].ep_nep[1] = ep_nep[1]; 1014 stats[lcore_id].fp_nfp[0] = fp_nfp[0]; 1015 stats[lcore_id].fp_nfp[1] = fp_nfp[1]; 1016 stats[lcore_id].br = br; 1017 rte_spinlock_unlock(&stats[lcore_id].telemetry_lock); 1018 } 1019 } 1020 1021 return 0; 1022 } 1023 /* main processing loop */ 1024 static int 1025 main_empty_poll_loop(__rte_unused void *dummy) 1026 { 1027 struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; 1028 unsigned int lcore_id; 1029 uint64_t prev_tsc, diff_tsc, cur_tsc; 1030 int i, j, nb_rx; 1031 uint8_t queueid; 1032 uint16_t portid; 1033 struct lcore_conf *qconf; 1034 struct lcore_rx_queue *rx_queue; 1035 1036 const uint64_t drain_tsc = 1037 (rte_get_tsc_hz() + US_PER_S - 1) / 1038 US_PER_S * BURST_TX_DRAIN_US; 1039 1040 prev_tsc = 0; 1041 1042 lcore_id = rte_lcore_id(); 1043 qconf = &lcore_conf[lcore_id]; 1044 1045 if (qconf->n_rx_queue == 0) { 1046 RTE_LOG(INFO, L3FWD_POWER, "lcore %u has nothing to do\n", 1047 lcore_id); 1048 return 0; 1049 } 1050 1051 for (i = 0; i < qconf->n_rx_queue; i++) { 1052 portid = qconf->rx_queue_list[i].port_id; 1053 queueid = qconf->rx_queue_list[i].queue_id; 1054 RTE_LOG(INFO, L3FWD_POWER, " -- lcoreid=%u portid=%u " 1055 "rxqueueid=%hhu\n", lcore_id, portid, queueid); 1056 } 1057 1058 while (!is_done()) { 1059 stats[lcore_id].nb_iteration_looped++; 1060 1061 cur_tsc = rte_rdtsc(); 1062 /* 1063 * TX burst queue drain 1064 */ 1065 diff_tsc = cur_tsc - prev_tsc; 1066 if (unlikely(diff_tsc > drain_tsc)) { 1067 for (i = 0; i < qconf->n_tx_port; ++i) { 1068 portid = qconf->tx_port_id[i]; 1069 rte_eth_tx_buffer_flush(portid, 1070 qconf->tx_queue_id[portid], 1071 qconf->tx_buffer[portid]); 1072 } 1073 prev_tsc = cur_tsc; 1074 } 1075 1076 /* 1077 * Read packet from RX queues 1078 */ 1079 for (i = 0; i < qconf->n_rx_queue; ++i) { 1080 rx_queue = &(qconf->rx_queue_list[i]); 1081 rx_queue->idle_hint = 0; 1082 portid = rx_queue->port_id; 1083 queueid = rx_queue->queue_id; 1084 1085 nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst, 1086 MAX_PKT_BURST); 1087 1088 stats[lcore_id].nb_rx_processed += nb_rx; 1089 1090 if (nb_rx == 0) { 1091 1092 rte_power_empty_poll_stat_update(lcore_id); 1093 1094 continue; 1095 } else { 1096 rte_power_poll_stat_update(lcore_id, nb_rx); 1097 } 1098 1099 1100 /* Prefetch first packets */ 1101 for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++) { 1102 rte_prefetch0(rte_pktmbuf_mtod( 1103 pkts_burst[j], void *)); 1104 } 1105 1106 /* Prefetch and forward already prefetched packets */ 1107 for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) { 1108 rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[ 1109 j + PREFETCH_OFFSET], 1110 void *)); 1111 l3fwd_simple_forward(pkts_burst[j], portid, 1112 qconf); 1113 } 1114 1115 /* Forward remaining prefetched packets */ 1116 for (; j < nb_rx; j++) { 1117 l3fwd_simple_forward(pkts_burst[j], portid, 1118 qconf); 1119 } 1120 1121 } 1122 1123 } 1124 1125 return 0; 1126 } 1127 /* main processing loop */ 1128 static int 1129 main_legacy_loop(__rte_unused void *dummy) 1130 { 1131 struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; 1132 unsigned lcore_id; 1133 uint64_t prev_tsc, diff_tsc, cur_tsc, tim_res_tsc, hz; 1134 uint64_t prev_tsc_power = 0, cur_tsc_power, diff_tsc_power; 1135 int i, j, nb_rx; 1136 uint8_t queueid; 1137 uint16_t portid; 1138 struct lcore_conf *qconf; 1139 struct lcore_rx_queue *rx_queue; 1140 enum freq_scale_hint_t lcore_scaleup_hint; 1141 uint32_t lcore_rx_idle_count = 0; 1142 uint32_t lcore_idle_hint = 0; 1143 int intr_en = 0; 1144 1145 const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * BURST_TX_DRAIN_US; 1146 1147 prev_tsc = 0; 1148 hz = rte_get_timer_hz(); 1149 tim_res_tsc = hz/TIMER_NUMBER_PER_SECOND; 1150 1151 lcore_id = rte_lcore_id(); 1152 qconf = &lcore_conf[lcore_id]; 1153 1154 if (qconf->n_rx_queue == 0) { 1155 RTE_LOG(INFO, L3FWD_POWER, "lcore %u has nothing to do\n", lcore_id); 1156 return 0; 1157 } 1158 1159 RTE_LOG(INFO, L3FWD_POWER, "entering main loop on lcore %u\n", lcore_id); 1160 1161 for (i = 0; i < qconf->n_rx_queue; i++) { 1162 portid = qconf->rx_queue_list[i].port_id; 1163 queueid = qconf->rx_queue_list[i].queue_id; 1164 RTE_LOG(INFO, L3FWD_POWER, " -- lcoreid=%u portid=%u " 1165 "rxqueueid=%hhu\n", lcore_id, portid, queueid); 1166 } 1167 1168 /* add into event wait list */ 1169 if (event_register(qconf) == 0) 1170 intr_en = 1; 1171 else 1172 RTE_LOG(INFO, L3FWD_POWER, "RX interrupt won't enable.\n"); 1173 1174 while (!is_done()) { 1175 stats[lcore_id].nb_iteration_looped++; 1176 1177 cur_tsc = rte_rdtsc(); 1178 cur_tsc_power = cur_tsc; 1179 1180 /* 1181 * TX burst queue drain 1182 */ 1183 diff_tsc = cur_tsc - prev_tsc; 1184 if (unlikely(diff_tsc > drain_tsc)) { 1185 for (i = 0; i < qconf->n_tx_port; ++i) { 1186 portid = qconf->tx_port_id[i]; 1187 rte_eth_tx_buffer_flush(portid, 1188 qconf->tx_queue_id[portid], 1189 qconf->tx_buffer[portid]); 1190 } 1191 prev_tsc = cur_tsc; 1192 } 1193 1194 diff_tsc_power = cur_tsc_power - prev_tsc_power; 1195 if (diff_tsc_power > tim_res_tsc) { 1196 rte_timer_manage(); 1197 prev_tsc_power = cur_tsc_power; 1198 } 1199 1200 start_rx: 1201 /* 1202 * Read packet from RX queues 1203 */ 1204 lcore_scaleup_hint = FREQ_CURRENT; 1205 lcore_rx_idle_count = 0; 1206 for (i = 0; i < qconf->n_rx_queue; ++i) { 1207 rx_queue = &(qconf->rx_queue_list[i]); 1208 rx_queue->idle_hint = 0; 1209 portid = rx_queue->port_id; 1210 queueid = rx_queue->queue_id; 1211 1212 nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst, 1213 MAX_PKT_BURST); 1214 1215 stats[lcore_id].nb_rx_processed += nb_rx; 1216 if (unlikely(nb_rx == 0)) { 1217 /** 1218 * no packet received from rx queue, try to 1219 * sleep for a while forcing CPU enter deeper 1220 * C states. 1221 */ 1222 rx_queue->zero_rx_packet_count++; 1223 1224 if (rx_queue->zero_rx_packet_count <= 1225 MIN_ZERO_POLL_COUNT) 1226 continue; 1227 1228 rx_queue->idle_hint = power_idle_heuristic(\ 1229 rx_queue->zero_rx_packet_count); 1230 lcore_rx_idle_count++; 1231 } else { 1232 rx_queue->zero_rx_packet_count = 0; 1233 1234 /** 1235 * do not scale up frequency immediately as 1236 * user to kernel space communication is costly 1237 * which might impact packet I/O for received 1238 * packets. 1239 */ 1240 rx_queue->freq_up_hint = 1241 power_freq_scaleup_heuristic(lcore_id, 1242 portid, queueid); 1243 } 1244 1245 /* Prefetch first packets */ 1246 for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++) { 1247 rte_prefetch0(rte_pktmbuf_mtod( 1248 pkts_burst[j], void *)); 1249 } 1250 1251 /* Prefetch and forward already prefetched packets */ 1252 for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) { 1253 rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[ 1254 j + PREFETCH_OFFSET], void *)); 1255 l3fwd_simple_forward(pkts_burst[j], portid, 1256 qconf); 1257 } 1258 1259 /* Forward remaining prefetched packets */ 1260 for (; j < nb_rx; j++) { 1261 l3fwd_simple_forward(pkts_burst[j], portid, 1262 qconf); 1263 } 1264 } 1265 1266 if (likely(lcore_rx_idle_count != qconf->n_rx_queue)) { 1267 for (i = 1, lcore_scaleup_hint = 1268 qconf->rx_queue_list[0].freq_up_hint; 1269 i < qconf->n_rx_queue; ++i) { 1270 rx_queue = &(qconf->rx_queue_list[i]); 1271 if (rx_queue->freq_up_hint > 1272 lcore_scaleup_hint) 1273 lcore_scaleup_hint = 1274 rx_queue->freq_up_hint; 1275 } 1276 1277 if (lcore_scaleup_hint == FREQ_HIGHEST) { 1278 if (rte_power_freq_max) 1279 rte_power_freq_max(lcore_id); 1280 } else if (lcore_scaleup_hint == FREQ_HIGHER) { 1281 if (rte_power_freq_up) 1282 rte_power_freq_up(lcore_id); 1283 } 1284 } else { 1285 /** 1286 * All Rx queues empty in recent consecutive polls, 1287 * sleep in a conservative manner, meaning sleep as 1288 * less as possible. 1289 */ 1290 for (i = 1, lcore_idle_hint = 1291 qconf->rx_queue_list[0].idle_hint; 1292 i < qconf->n_rx_queue; ++i) { 1293 rx_queue = &(qconf->rx_queue_list[i]); 1294 if (rx_queue->idle_hint < lcore_idle_hint) 1295 lcore_idle_hint = rx_queue->idle_hint; 1296 } 1297 1298 if (lcore_idle_hint < SUSPEND_THRESHOLD) 1299 /** 1300 * execute "pause" instruction to avoid context 1301 * switch which generally take hundred of 1302 * microseconds for short sleep. 1303 */ 1304 rte_delay_us(lcore_idle_hint); 1305 else { 1306 /* suspend until rx interrupt triggers */ 1307 if (intr_en) { 1308 turn_on_off_intr(qconf, 1); 1309 sleep_until_rx_interrupt( 1310 qconf->n_rx_queue); 1311 turn_on_off_intr(qconf, 0); 1312 /** 1313 * start receiving packets immediately 1314 */ 1315 if (likely(!is_done())) 1316 goto start_rx; 1317 } 1318 } 1319 stats[lcore_id].sleep_time += lcore_idle_hint; 1320 } 1321 } 1322 1323 return 0; 1324 } 1325 1326 static int 1327 check_lcore_params(void) 1328 { 1329 uint8_t queue, lcore; 1330 uint16_t i; 1331 int socketid; 1332 1333 for (i = 0; i < nb_lcore_params; ++i) { 1334 queue = lcore_params[i].queue_id; 1335 if (queue >= MAX_RX_QUEUE_PER_PORT) { 1336 printf("invalid queue number: %hhu\n", queue); 1337 return -1; 1338 } 1339 lcore = lcore_params[i].lcore_id; 1340 if (!rte_lcore_is_enabled(lcore)) { 1341 printf("error: lcore %hhu is not enabled in lcore " 1342 "mask\n", lcore); 1343 return -1; 1344 } 1345 if ((socketid = rte_lcore_to_socket_id(lcore) != 0) && 1346 (numa_on == 0)) { 1347 printf("warning: lcore %hhu is on socket %d with numa " 1348 "off\n", lcore, socketid); 1349 } 1350 if (app_mode == APP_MODE_TELEMETRY && lcore == rte_lcore_id()) { 1351 printf("cannot enable master core %d in config for telemetry mode\n", 1352 rte_lcore_id()); 1353 return -1; 1354 } 1355 } 1356 return 0; 1357 } 1358 1359 static int 1360 check_port_config(void) 1361 { 1362 unsigned portid; 1363 uint16_t i; 1364 1365 for (i = 0; i < nb_lcore_params; ++i) { 1366 portid = lcore_params[i].port_id; 1367 if ((enabled_port_mask & (1 << portid)) == 0) { 1368 printf("port %u is not enabled in port mask\n", 1369 portid); 1370 return -1; 1371 } 1372 if (!rte_eth_dev_is_valid_port(portid)) { 1373 printf("port %u is not present on the board\n", 1374 portid); 1375 return -1; 1376 } 1377 } 1378 return 0; 1379 } 1380 1381 static uint8_t 1382 get_port_n_rx_queues(const uint16_t port) 1383 { 1384 int queue = -1; 1385 uint16_t i; 1386 1387 for (i = 0; i < nb_lcore_params; ++i) { 1388 if (lcore_params[i].port_id == port && 1389 lcore_params[i].queue_id > queue) 1390 queue = lcore_params[i].queue_id; 1391 } 1392 return (uint8_t)(++queue); 1393 } 1394 1395 static int 1396 init_lcore_rx_queues(void) 1397 { 1398 uint16_t i, nb_rx_queue; 1399 uint8_t lcore; 1400 1401 for (i = 0; i < nb_lcore_params; ++i) { 1402 lcore = lcore_params[i].lcore_id; 1403 nb_rx_queue = lcore_conf[lcore].n_rx_queue; 1404 if (nb_rx_queue >= MAX_RX_QUEUE_PER_LCORE) { 1405 printf("error: too many queues (%u) for lcore: %u\n", 1406 (unsigned)nb_rx_queue + 1, (unsigned)lcore); 1407 return -1; 1408 } else { 1409 lcore_conf[lcore].rx_queue_list[nb_rx_queue].port_id = 1410 lcore_params[i].port_id; 1411 lcore_conf[lcore].rx_queue_list[nb_rx_queue].queue_id = 1412 lcore_params[i].queue_id; 1413 lcore_conf[lcore].n_rx_queue++; 1414 } 1415 } 1416 return 0; 1417 } 1418 1419 /* display usage */ 1420 static void 1421 print_usage(const char *prgname) 1422 { 1423 printf ("%s [EAL options] -- -p PORTMASK -P" 1424 " [--config (port,queue,lcore)[,(port,queue,lcore]]" 1425 " [--high-perf-cores CORELIST" 1426 " [--perf-config (port,queue,hi_perf,lcore_index)[,(port,queue,hi_perf,lcore_index]]" 1427 " [--enable-jumbo [--max-pkt-len PKTLEN]]\n" 1428 " -p PORTMASK: hexadecimal bitmask of ports to configure\n" 1429 " -P : enable promiscuous mode\n" 1430 " --config (port,queue,lcore): rx queues configuration\n" 1431 " --high-perf-cores CORELIST: list of high performance cores\n" 1432 " --perf-config: similar as config, cores specified as indices" 1433 " for bins containing high or regular performance cores\n" 1434 " --no-numa: optional, disable numa awareness\n" 1435 " --enable-jumbo: enable jumbo frame" 1436 " which max packet len is PKTLEN in decimal (64-9600)\n" 1437 " --parse-ptype: parse packet type by software\n" 1438 " --empty-poll: enable empty poll detection" 1439 " follow (training_flag, high_threshold, med_threshold)\n" 1440 " --telemetry: enable telemetry mode, to update" 1441 " empty polls, full polls, and core busyness to telemetry\n", 1442 prgname); 1443 } 1444 1445 static int parse_max_pkt_len(const char *pktlen) 1446 { 1447 char *end = NULL; 1448 unsigned long len; 1449 1450 /* parse decimal string */ 1451 len = strtoul(pktlen, &end, 10); 1452 if ((pktlen[0] == '\0') || (end == NULL) || (*end != '\0')) 1453 return -1; 1454 1455 if (len == 0) 1456 return -1; 1457 1458 return len; 1459 } 1460 1461 static int 1462 parse_portmask(const char *portmask) 1463 { 1464 char *end = NULL; 1465 unsigned long pm; 1466 1467 /* parse hexadecimal string */ 1468 pm = strtoul(portmask, &end, 16); 1469 if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0')) 1470 return -1; 1471 1472 if (pm == 0) 1473 return -1; 1474 1475 return pm; 1476 } 1477 1478 static int 1479 parse_config(const char *q_arg) 1480 { 1481 char s[256]; 1482 const char *p, *p0 = q_arg; 1483 char *end; 1484 enum fieldnames { 1485 FLD_PORT = 0, 1486 FLD_QUEUE, 1487 FLD_LCORE, 1488 _NUM_FLD 1489 }; 1490 unsigned long int_fld[_NUM_FLD]; 1491 char *str_fld[_NUM_FLD]; 1492 int i; 1493 unsigned size; 1494 1495 nb_lcore_params = 0; 1496 1497 while ((p = strchr(p0,'(')) != NULL) { 1498 ++p; 1499 if((p0 = strchr(p,')')) == NULL) 1500 return -1; 1501 1502 size = p0 - p; 1503 if(size >= sizeof(s)) 1504 return -1; 1505 1506 snprintf(s, sizeof(s), "%.*s", size, p); 1507 if (rte_strsplit(s, sizeof(s), str_fld, _NUM_FLD, ',') != 1508 _NUM_FLD) 1509 return -1; 1510 for (i = 0; i < _NUM_FLD; i++){ 1511 errno = 0; 1512 int_fld[i] = strtoul(str_fld[i], &end, 0); 1513 if (errno != 0 || end == str_fld[i] || int_fld[i] > 1514 255) 1515 return -1; 1516 } 1517 if (nb_lcore_params >= MAX_LCORE_PARAMS) { 1518 printf("exceeded max number of lcore params: %hu\n", 1519 nb_lcore_params); 1520 return -1; 1521 } 1522 lcore_params_array[nb_lcore_params].port_id = 1523 (uint8_t)int_fld[FLD_PORT]; 1524 lcore_params_array[nb_lcore_params].queue_id = 1525 (uint8_t)int_fld[FLD_QUEUE]; 1526 lcore_params_array[nb_lcore_params].lcore_id = 1527 (uint8_t)int_fld[FLD_LCORE]; 1528 ++nb_lcore_params; 1529 } 1530 lcore_params = lcore_params_array; 1531 1532 return 0; 1533 } 1534 static int 1535 parse_ep_config(const char *q_arg) 1536 { 1537 char s[256]; 1538 const char *p = q_arg; 1539 char *end; 1540 int num_arg; 1541 1542 char *str_fld[3]; 1543 1544 int training_flag; 1545 int med_edpi; 1546 int hgh_edpi; 1547 1548 ep_med_edpi = EMPTY_POLL_MED_THRESHOLD; 1549 ep_hgh_edpi = EMPTY_POLL_MED_THRESHOLD; 1550 1551 strlcpy(s, p, sizeof(s)); 1552 1553 num_arg = rte_strsplit(s, sizeof(s), str_fld, 3, ','); 1554 1555 empty_poll_train = false; 1556 1557 if (num_arg == 0) 1558 return 0; 1559 1560 if (num_arg == 3) { 1561 1562 training_flag = strtoul(str_fld[0], &end, 0); 1563 med_edpi = strtoul(str_fld[1], &end, 0); 1564 hgh_edpi = strtoul(str_fld[2], &end, 0); 1565 1566 if (training_flag == 1) 1567 empty_poll_train = true; 1568 1569 if (med_edpi > 0) 1570 ep_med_edpi = med_edpi; 1571 1572 if (med_edpi > 0) 1573 ep_hgh_edpi = hgh_edpi; 1574 1575 } else { 1576 1577 return -1; 1578 } 1579 1580 return 0; 1581 1582 } 1583 #define CMD_LINE_OPT_PARSE_PTYPE "parse-ptype" 1584 #define CMD_LINE_OPT_EMPTY_POLL "empty-poll" 1585 #define CMD_LINE_OPT_TELEMETRY "telemetry" 1586 1587 /* Parse the argument given in the command line of the application */ 1588 static int 1589 parse_args(int argc, char **argv) 1590 { 1591 int opt, ret; 1592 char **argvopt; 1593 int option_index; 1594 uint32_t limit; 1595 char *prgname = argv[0]; 1596 static struct option lgopts[] = { 1597 {"config", 1, 0, 0}, 1598 {"perf-config", 1, 0, 0}, 1599 {"high-perf-cores", 1, 0, 0}, 1600 {"no-numa", 0, 0, 0}, 1601 {"enable-jumbo", 0, 0, 0}, 1602 {CMD_LINE_OPT_EMPTY_POLL, 1, 0, 0}, 1603 {CMD_LINE_OPT_PARSE_PTYPE, 0, 0, 0}, 1604 {CMD_LINE_OPT_TELEMETRY, 0, 0, 0}, 1605 {NULL, 0, 0, 0} 1606 }; 1607 1608 argvopt = argv; 1609 1610 while ((opt = getopt_long(argc, argvopt, "p:l:m:h:P", 1611 lgopts, &option_index)) != EOF) { 1612 1613 switch (opt) { 1614 /* portmask */ 1615 case 'p': 1616 enabled_port_mask = parse_portmask(optarg); 1617 if (enabled_port_mask == 0) { 1618 printf("invalid portmask\n"); 1619 print_usage(prgname); 1620 return -1; 1621 } 1622 break; 1623 case 'P': 1624 printf("Promiscuous mode selected\n"); 1625 promiscuous_on = 1; 1626 break; 1627 case 'l': 1628 limit = parse_max_pkt_len(optarg); 1629 freq_tlb[LOW] = limit; 1630 break; 1631 case 'm': 1632 limit = parse_max_pkt_len(optarg); 1633 freq_tlb[MED] = limit; 1634 break; 1635 case 'h': 1636 limit = parse_max_pkt_len(optarg); 1637 freq_tlb[HGH] = limit; 1638 break; 1639 /* long options */ 1640 case 0: 1641 if (!strncmp(lgopts[option_index].name, "config", 6)) { 1642 ret = parse_config(optarg); 1643 if (ret) { 1644 printf("invalid config\n"); 1645 print_usage(prgname); 1646 return -1; 1647 } 1648 } 1649 1650 if (!strncmp(lgopts[option_index].name, 1651 "perf-config", 11)) { 1652 ret = parse_perf_config(optarg); 1653 if (ret) { 1654 printf("invalid perf-config\n"); 1655 print_usage(prgname); 1656 return -1; 1657 } 1658 } 1659 1660 if (!strncmp(lgopts[option_index].name, 1661 "high-perf-cores", 15)) { 1662 ret = parse_perf_core_list(optarg); 1663 if (ret) { 1664 printf("invalid high-perf-cores\n"); 1665 print_usage(prgname); 1666 return -1; 1667 } 1668 } 1669 1670 if (!strncmp(lgopts[option_index].name, 1671 "no-numa", 7)) { 1672 printf("numa is disabled \n"); 1673 numa_on = 0; 1674 } 1675 1676 if (!strncmp(lgopts[option_index].name, 1677 CMD_LINE_OPT_EMPTY_POLL, 10)) { 1678 if (app_mode == APP_MODE_TELEMETRY) { 1679 printf(" empty-poll cannot be enabled as telemetry mode is enabled\n"); 1680 return -1; 1681 } 1682 app_mode = APP_MODE_EMPTY_POLL; 1683 ret = parse_ep_config(optarg); 1684 1685 if (ret) { 1686 printf("invalid empty poll config\n"); 1687 print_usage(prgname); 1688 return -1; 1689 } 1690 printf("empty-poll is enabled\n"); 1691 } 1692 1693 if (!strncmp(lgopts[option_index].name, 1694 CMD_LINE_OPT_TELEMETRY, 1695 sizeof(CMD_LINE_OPT_TELEMETRY))) { 1696 if (app_mode == APP_MODE_EMPTY_POLL) { 1697 printf("telemetry mode cannot be enabled as empty poll mode is enabled\n"); 1698 return -1; 1699 } 1700 app_mode = APP_MODE_TELEMETRY; 1701 printf("telemetry mode is enabled\n"); 1702 } 1703 1704 if (!strncmp(lgopts[option_index].name, 1705 "enable-jumbo", 12)) { 1706 struct option lenopts = 1707 {"max-pkt-len", required_argument, \ 1708 0, 0}; 1709 1710 printf("jumbo frame is enabled \n"); 1711 port_conf.rxmode.offloads |= 1712 DEV_RX_OFFLOAD_JUMBO_FRAME; 1713 port_conf.txmode.offloads |= 1714 DEV_TX_OFFLOAD_MULTI_SEGS; 1715 1716 /** 1717 * if no max-pkt-len set, use the default value 1718 * RTE_ETHER_MAX_LEN 1719 */ 1720 if (0 == getopt_long(argc, argvopt, "", 1721 &lenopts, &option_index)) { 1722 ret = parse_max_pkt_len(optarg); 1723 if ((ret < 64) || 1724 (ret > MAX_JUMBO_PKT_LEN)){ 1725 printf("invalid packet " 1726 "length\n"); 1727 print_usage(prgname); 1728 return -1; 1729 } 1730 port_conf.rxmode.max_rx_pkt_len = ret; 1731 } 1732 printf("set jumbo frame " 1733 "max packet length to %u\n", 1734 (unsigned int)port_conf.rxmode.max_rx_pkt_len); 1735 } 1736 1737 if (!strncmp(lgopts[option_index].name, 1738 CMD_LINE_OPT_PARSE_PTYPE, 1739 sizeof(CMD_LINE_OPT_PARSE_PTYPE))) { 1740 printf("soft parse-ptype is enabled\n"); 1741 parse_ptype = 1; 1742 } 1743 1744 break; 1745 1746 default: 1747 print_usage(prgname); 1748 return -1; 1749 } 1750 } 1751 1752 if (optind >= 0) 1753 argv[optind-1] = prgname; 1754 1755 ret = optind-1; 1756 optind = 1; /* reset getopt lib */ 1757 return ret; 1758 } 1759 1760 static void 1761 print_ethaddr(const char *name, const struct rte_ether_addr *eth_addr) 1762 { 1763 char buf[RTE_ETHER_ADDR_FMT_SIZE]; 1764 rte_ether_format_addr(buf, RTE_ETHER_ADDR_FMT_SIZE, eth_addr); 1765 printf("%s%s", name, buf); 1766 } 1767 1768 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 1769 static void 1770 setup_hash(int socketid) 1771 { 1772 struct rte_hash_parameters ipv4_l3fwd_hash_params = { 1773 .name = NULL, 1774 .entries = L3FWD_HASH_ENTRIES, 1775 .key_len = sizeof(struct ipv4_5tuple), 1776 .hash_func = DEFAULT_HASH_FUNC, 1777 .hash_func_init_val = 0, 1778 }; 1779 1780 struct rte_hash_parameters ipv6_l3fwd_hash_params = { 1781 .name = NULL, 1782 .entries = L3FWD_HASH_ENTRIES, 1783 .key_len = sizeof(struct ipv6_5tuple), 1784 .hash_func = DEFAULT_HASH_FUNC, 1785 .hash_func_init_val = 0, 1786 }; 1787 1788 unsigned i; 1789 int ret; 1790 char s[64]; 1791 1792 /* create ipv4 hash */ 1793 snprintf(s, sizeof(s), "ipv4_l3fwd_hash_%d", socketid); 1794 ipv4_l3fwd_hash_params.name = s; 1795 ipv4_l3fwd_hash_params.socket_id = socketid; 1796 ipv4_l3fwd_lookup_struct[socketid] = 1797 rte_hash_create(&ipv4_l3fwd_hash_params); 1798 if (ipv4_l3fwd_lookup_struct[socketid] == NULL) 1799 rte_exit(EXIT_FAILURE, "Unable to create the l3fwd hash on " 1800 "socket %d\n", socketid); 1801 1802 /* create ipv6 hash */ 1803 snprintf(s, sizeof(s), "ipv6_l3fwd_hash_%d", socketid); 1804 ipv6_l3fwd_hash_params.name = s; 1805 ipv6_l3fwd_hash_params.socket_id = socketid; 1806 ipv6_l3fwd_lookup_struct[socketid] = 1807 rte_hash_create(&ipv6_l3fwd_hash_params); 1808 if (ipv6_l3fwd_lookup_struct[socketid] == NULL) 1809 rte_exit(EXIT_FAILURE, "Unable to create the l3fwd hash on " 1810 "socket %d\n", socketid); 1811 1812 1813 /* populate the ipv4 hash */ 1814 for (i = 0; i < RTE_DIM(ipv4_l3fwd_route_array); i++) { 1815 ret = rte_hash_add_key (ipv4_l3fwd_lookup_struct[socketid], 1816 (void *) &ipv4_l3fwd_route_array[i].key); 1817 if (ret < 0) { 1818 rte_exit(EXIT_FAILURE, "Unable to add entry %u to the" 1819 "l3fwd hash on socket %d\n", i, socketid); 1820 } 1821 ipv4_l3fwd_out_if[ret] = ipv4_l3fwd_route_array[i].if_out; 1822 printf("Hash: Adding key\n"); 1823 print_ipv4_key(ipv4_l3fwd_route_array[i].key); 1824 } 1825 1826 /* populate the ipv6 hash */ 1827 for (i = 0; i < RTE_DIM(ipv6_l3fwd_route_array); i++) { 1828 ret = rte_hash_add_key (ipv6_l3fwd_lookup_struct[socketid], 1829 (void *) &ipv6_l3fwd_route_array[i].key); 1830 if (ret < 0) { 1831 rte_exit(EXIT_FAILURE, "Unable to add entry %u to the" 1832 "l3fwd hash on socket %d\n", i, socketid); 1833 } 1834 ipv6_l3fwd_out_if[ret] = ipv6_l3fwd_route_array[i].if_out; 1835 printf("Hash: Adding key\n"); 1836 print_ipv6_key(ipv6_l3fwd_route_array[i].key); 1837 } 1838 } 1839 #endif 1840 1841 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 1842 static void 1843 setup_lpm(int socketid) 1844 { 1845 unsigned i; 1846 int ret; 1847 char s[64]; 1848 1849 /* create the LPM table */ 1850 struct rte_lpm_config lpm_ipv4_config; 1851 1852 lpm_ipv4_config.max_rules = IPV4_L3FWD_LPM_MAX_RULES; 1853 lpm_ipv4_config.number_tbl8s = 256; 1854 lpm_ipv4_config.flags = 0; 1855 1856 snprintf(s, sizeof(s), "IPV4_L3FWD_LPM_%d", socketid); 1857 ipv4_l3fwd_lookup_struct[socketid] = 1858 rte_lpm_create(s, socketid, &lpm_ipv4_config); 1859 if (ipv4_l3fwd_lookup_struct[socketid] == NULL) 1860 rte_exit(EXIT_FAILURE, "Unable to create the l3fwd LPM table" 1861 " on socket %d\n", socketid); 1862 1863 /* populate the LPM table */ 1864 for (i = 0; i < RTE_DIM(ipv4_l3fwd_route_array); i++) { 1865 ret = rte_lpm_add(ipv4_l3fwd_lookup_struct[socketid], 1866 ipv4_l3fwd_route_array[i].ip, 1867 ipv4_l3fwd_route_array[i].depth, 1868 ipv4_l3fwd_route_array[i].if_out); 1869 1870 if (ret < 0) { 1871 rte_exit(EXIT_FAILURE, "Unable to add entry %u to the " 1872 "l3fwd LPM table on socket %d\n", 1873 i, socketid); 1874 } 1875 1876 printf("LPM: Adding route 0x%08x / %d (%d)\n", 1877 (unsigned)ipv4_l3fwd_route_array[i].ip, 1878 ipv4_l3fwd_route_array[i].depth, 1879 ipv4_l3fwd_route_array[i].if_out); 1880 } 1881 } 1882 #endif 1883 1884 static int 1885 init_mem(unsigned nb_mbuf) 1886 { 1887 struct lcore_conf *qconf; 1888 int socketid; 1889 unsigned lcore_id; 1890 char s[64]; 1891 1892 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { 1893 if (rte_lcore_is_enabled(lcore_id) == 0) 1894 continue; 1895 1896 if (numa_on) 1897 socketid = rte_lcore_to_socket_id(lcore_id); 1898 else 1899 socketid = 0; 1900 1901 if (socketid >= NB_SOCKETS) { 1902 rte_exit(EXIT_FAILURE, "Socket %d of lcore %u is " 1903 "out of range %d\n", socketid, 1904 lcore_id, NB_SOCKETS); 1905 } 1906 if (pktmbuf_pool[socketid] == NULL) { 1907 snprintf(s, sizeof(s), "mbuf_pool_%d", socketid); 1908 pktmbuf_pool[socketid] = 1909 rte_pktmbuf_pool_create(s, nb_mbuf, 1910 MEMPOOL_CACHE_SIZE, 0, 1911 RTE_MBUF_DEFAULT_BUF_SIZE, 1912 socketid); 1913 if (pktmbuf_pool[socketid] == NULL) 1914 rte_exit(EXIT_FAILURE, 1915 "Cannot init mbuf pool on socket %d\n", 1916 socketid); 1917 else 1918 printf("Allocated mbuf pool on socket %d\n", 1919 socketid); 1920 1921 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 1922 setup_lpm(socketid); 1923 #else 1924 setup_hash(socketid); 1925 #endif 1926 } 1927 qconf = &lcore_conf[lcore_id]; 1928 qconf->ipv4_lookup_struct = ipv4_l3fwd_lookup_struct[socketid]; 1929 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 1930 qconf->ipv6_lookup_struct = ipv6_l3fwd_lookup_struct[socketid]; 1931 #endif 1932 } 1933 return 0; 1934 } 1935 1936 /* Check the link status of all ports in up to 9s, and print them finally */ 1937 static void 1938 check_all_ports_link_status(uint32_t port_mask) 1939 { 1940 #define CHECK_INTERVAL 100 /* 100ms */ 1941 #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */ 1942 uint8_t count, all_ports_up, print_flag = 0; 1943 uint16_t portid; 1944 struct rte_eth_link link; 1945 int ret; 1946 1947 printf("\nChecking link status"); 1948 fflush(stdout); 1949 for (count = 0; count <= MAX_CHECK_TIME; count++) { 1950 all_ports_up = 1; 1951 RTE_ETH_FOREACH_DEV(portid) { 1952 if ((port_mask & (1 << portid)) == 0) 1953 continue; 1954 memset(&link, 0, sizeof(link)); 1955 ret = rte_eth_link_get_nowait(portid, &link); 1956 if (ret < 0) { 1957 all_ports_up = 0; 1958 if (print_flag == 1) 1959 printf("Port %u link get failed: %s\n", 1960 portid, rte_strerror(-ret)); 1961 continue; 1962 } 1963 /* print link status if flag set */ 1964 if (print_flag == 1) { 1965 if (link.link_status) 1966 printf("Port %d Link Up - speed %u " 1967 "Mbps - %s\n", (uint8_t)portid, 1968 (unsigned)link.link_speed, 1969 (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? 1970 ("full-duplex") : ("half-duplex")); 1971 else 1972 printf("Port %d Link Down\n", 1973 (uint8_t)portid); 1974 continue; 1975 } 1976 /* clear all_ports_up flag if any link down */ 1977 if (link.link_status == ETH_LINK_DOWN) { 1978 all_ports_up = 0; 1979 break; 1980 } 1981 } 1982 /* after finally printing all link status, get out */ 1983 if (print_flag == 1) 1984 break; 1985 1986 if (all_ports_up == 0) { 1987 printf("."); 1988 fflush(stdout); 1989 rte_delay_ms(CHECK_INTERVAL); 1990 } 1991 1992 /* set the print_flag if all ports up or timeout */ 1993 if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) { 1994 print_flag = 1; 1995 printf("done\n"); 1996 } 1997 } 1998 } 1999 2000 static int check_ptype(uint16_t portid) 2001 { 2002 int i, ret; 2003 int ptype_l3_ipv4 = 0; 2004 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 2005 int ptype_l3_ipv6 = 0; 2006 #endif 2007 uint32_t ptype_mask = RTE_PTYPE_L3_MASK; 2008 2009 ret = rte_eth_dev_get_supported_ptypes(portid, ptype_mask, NULL, 0); 2010 if (ret <= 0) 2011 return 0; 2012 2013 uint32_t ptypes[ret]; 2014 2015 ret = rte_eth_dev_get_supported_ptypes(portid, ptype_mask, ptypes, ret); 2016 for (i = 0; i < ret; ++i) { 2017 if (ptypes[i] & RTE_PTYPE_L3_IPV4) 2018 ptype_l3_ipv4 = 1; 2019 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 2020 if (ptypes[i] & RTE_PTYPE_L3_IPV6) 2021 ptype_l3_ipv6 = 1; 2022 #endif 2023 } 2024 2025 if (ptype_l3_ipv4 == 0) 2026 printf("port %d cannot parse RTE_PTYPE_L3_IPV4\n", portid); 2027 2028 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 2029 if (ptype_l3_ipv6 == 0) 2030 printf("port %d cannot parse RTE_PTYPE_L3_IPV6\n", portid); 2031 #endif 2032 2033 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 2034 if (ptype_l3_ipv4) 2035 #else /* APP_LOOKUP_EXACT_MATCH */ 2036 if (ptype_l3_ipv4 && ptype_l3_ipv6) 2037 #endif 2038 return 1; 2039 2040 return 0; 2041 2042 } 2043 2044 static int 2045 init_power_library(void) 2046 { 2047 enum power_management_env env; 2048 unsigned int lcore_id; 2049 int ret = 0; 2050 2051 RTE_LCORE_FOREACH(lcore_id) { 2052 /* init power management library */ 2053 ret = rte_power_init(lcore_id); 2054 if (ret) { 2055 RTE_LOG(ERR, POWER, 2056 "Library initialization failed on core %u\n", 2057 lcore_id); 2058 return ret; 2059 } 2060 /* we're not supporting the VM channel mode */ 2061 env = rte_power_get_env(); 2062 if (env != PM_ENV_ACPI_CPUFREQ && 2063 env != PM_ENV_PSTATE_CPUFREQ) { 2064 RTE_LOG(ERR, POWER, 2065 "Only ACPI and PSTATE mode are supported\n"); 2066 return -1; 2067 } 2068 } 2069 return ret; 2070 } 2071 2072 static int 2073 deinit_power_library(void) 2074 { 2075 unsigned int lcore_id; 2076 int ret = 0; 2077 2078 RTE_LCORE_FOREACH(lcore_id) { 2079 /* deinit power management library */ 2080 ret = rte_power_exit(lcore_id); 2081 if (ret) { 2082 RTE_LOG(ERR, POWER, 2083 "Library deinitialization failed on core %u\n", 2084 lcore_id); 2085 return ret; 2086 } 2087 } 2088 return ret; 2089 } 2090 2091 static void 2092 get_current_stat_values(uint64_t *values) 2093 { 2094 unsigned int lcore_id = rte_lcore_id(); 2095 struct lcore_conf *qconf; 2096 uint64_t app_eps = 0, app_fps = 0, app_br = 0; 2097 uint64_t count = 0; 2098 2099 RTE_LCORE_FOREACH_SLAVE(lcore_id) { 2100 qconf = &lcore_conf[lcore_id]; 2101 if (qconf->n_rx_queue == 0) 2102 continue; 2103 count++; 2104 rte_spinlock_lock(&stats[lcore_id].telemetry_lock); 2105 app_eps += stats[lcore_id].ep_nep[1]; 2106 app_fps += stats[lcore_id].fp_nfp[1]; 2107 app_br += stats[lcore_id].br; 2108 rte_spinlock_unlock(&stats[lcore_id].telemetry_lock); 2109 } 2110 2111 if (count > 0) { 2112 values[0] = app_eps/count; 2113 values[1] = app_fps/count; 2114 values[2] = app_br/count; 2115 } else 2116 memset(values, 0, sizeof(uint64_t) * NUM_TELSTATS); 2117 2118 } 2119 2120 static void 2121 update_telemetry(__rte_unused struct rte_timer *tim, 2122 __rte_unused void *arg) 2123 { 2124 int ret; 2125 uint64_t values[NUM_TELSTATS] = {0}; 2126 2127 get_current_stat_values(values); 2128 ret = rte_metrics_update_values(RTE_METRICS_GLOBAL, telstats_index, 2129 values, RTE_DIM(values)); 2130 if (ret < 0) 2131 RTE_LOG(WARNING, POWER, "failed to update metrcis\n"); 2132 } 2133 2134 static int 2135 handle_app_stats(const char *cmd __rte_unused, 2136 const char *params __rte_unused, 2137 struct rte_tel_data *d) 2138 { 2139 uint64_t values[NUM_TELSTATS] = {0}; 2140 uint32_t i; 2141 2142 rte_tel_data_start_dict(d); 2143 get_current_stat_values(values); 2144 for (i = 0; i < NUM_TELSTATS; i++) 2145 rte_tel_data_add_dict_u64(d, telstats_strings[i].name, 2146 values[i]); 2147 return 0; 2148 } 2149 2150 static void 2151 telemetry_setup_timer(void) 2152 { 2153 int lcore_id = rte_lcore_id(); 2154 uint64_t hz = rte_get_timer_hz(); 2155 uint64_t ticks; 2156 2157 ticks = hz / TELEMETRY_INTERVALS_PER_SEC; 2158 rte_timer_reset_sync(&telemetry_timer, 2159 ticks, 2160 PERIODICAL, 2161 lcore_id, 2162 update_telemetry, 2163 NULL); 2164 } 2165 static void 2166 empty_poll_setup_timer(void) 2167 { 2168 int lcore_id = rte_lcore_id(); 2169 uint64_t hz = rte_get_timer_hz(); 2170 2171 struct ep_params *ep_ptr = ep_params; 2172 2173 ep_ptr->interval_ticks = hz / INTERVALS_PER_SECOND; 2174 2175 rte_timer_reset_sync(&ep_ptr->timer0, 2176 ep_ptr->interval_ticks, 2177 PERIODICAL, 2178 lcore_id, 2179 rte_empty_poll_detection, 2180 (void *)ep_ptr); 2181 2182 } 2183 static int 2184 launch_timer(unsigned int lcore_id) 2185 { 2186 int64_t prev_tsc = 0, cur_tsc, diff_tsc, cycles_10ms; 2187 2188 RTE_SET_USED(lcore_id); 2189 2190 2191 if (rte_get_master_lcore() != lcore_id) { 2192 rte_panic("timer on lcore:%d which is not master core:%d\n", 2193 lcore_id, 2194 rte_get_master_lcore()); 2195 } 2196 2197 RTE_LOG(INFO, POWER, "Bring up the Timer\n"); 2198 2199 if (app_mode == APP_MODE_EMPTY_POLL) 2200 empty_poll_setup_timer(); 2201 else 2202 telemetry_setup_timer(); 2203 2204 cycles_10ms = rte_get_timer_hz() / 100; 2205 2206 while (!is_done()) { 2207 cur_tsc = rte_rdtsc(); 2208 diff_tsc = cur_tsc - prev_tsc; 2209 if (diff_tsc > cycles_10ms) { 2210 rte_timer_manage(); 2211 prev_tsc = cur_tsc; 2212 cycles_10ms = rte_get_timer_hz() / 100; 2213 } 2214 } 2215 2216 RTE_LOG(INFO, POWER, "Timer_subsystem is done\n"); 2217 2218 return 0; 2219 } 2220 2221 2222 int 2223 main(int argc, char **argv) 2224 { 2225 struct lcore_conf *qconf; 2226 struct rte_eth_dev_info dev_info; 2227 struct rte_eth_txconf *txconf; 2228 int ret; 2229 uint16_t nb_ports; 2230 uint16_t queueid; 2231 unsigned lcore_id; 2232 uint64_t hz; 2233 uint32_t n_tx_queue, nb_lcores; 2234 uint32_t dev_rxq_num, dev_txq_num; 2235 uint8_t nb_rx_queue, queue, socketid; 2236 uint16_t portid; 2237 const char *ptr_strings[NUM_TELSTATS]; 2238 2239 /* catch SIGINT and restore cpufreq governor to ondemand */ 2240 signal(SIGINT, signal_exit_now); 2241 2242 /* init EAL */ 2243 ret = rte_eal_init(argc, argv); 2244 if (ret < 0) 2245 rte_exit(EXIT_FAILURE, "Invalid EAL parameters\n"); 2246 argc -= ret; 2247 argv += ret; 2248 2249 /* init RTE timer library to be used late */ 2250 rte_timer_subsystem_init(); 2251 2252 /* parse application arguments (after the EAL ones) */ 2253 ret = parse_args(argc, argv); 2254 if (ret < 0) 2255 rte_exit(EXIT_FAILURE, "Invalid L3FWD parameters\n"); 2256 2257 /* only legacy and empty poll mode rely on power library */ 2258 if ((app_mode == APP_MODE_LEGACY || app_mode == APP_MODE_EMPTY_POLL) && 2259 init_power_library()) 2260 rte_exit(EXIT_FAILURE, "init_power_library failed\n"); 2261 2262 if (update_lcore_params() < 0) 2263 rte_exit(EXIT_FAILURE, "update_lcore_params failed\n"); 2264 2265 if (check_lcore_params() < 0) 2266 rte_exit(EXIT_FAILURE, "check_lcore_params failed\n"); 2267 2268 ret = init_lcore_rx_queues(); 2269 if (ret < 0) 2270 rte_exit(EXIT_FAILURE, "init_lcore_rx_queues failed\n"); 2271 2272 nb_ports = rte_eth_dev_count_avail(); 2273 2274 if (check_port_config() < 0) 2275 rte_exit(EXIT_FAILURE, "check_port_config failed\n"); 2276 2277 nb_lcores = rte_lcore_count(); 2278 2279 /* initialize all ports */ 2280 RTE_ETH_FOREACH_DEV(portid) { 2281 struct rte_eth_conf local_port_conf = port_conf; 2282 /* not all app modes need interrupts */ 2283 bool need_intr = app_mode == APP_MODE_LEGACY; 2284 2285 /* skip ports that are not enabled */ 2286 if ((enabled_port_mask & (1 << portid)) == 0) { 2287 printf("\nSkipping disabled port %d\n", portid); 2288 continue; 2289 } 2290 2291 /* init port */ 2292 printf("Initializing port %d ... ", portid ); 2293 fflush(stdout); 2294 2295 ret = rte_eth_dev_info_get(portid, &dev_info); 2296 if (ret != 0) 2297 rte_exit(EXIT_FAILURE, 2298 "Error during getting device (port %u) info: %s\n", 2299 portid, strerror(-ret)); 2300 2301 dev_rxq_num = dev_info.max_rx_queues; 2302 dev_txq_num = dev_info.max_tx_queues; 2303 2304 nb_rx_queue = get_port_n_rx_queues(portid); 2305 if (nb_rx_queue > dev_rxq_num) 2306 rte_exit(EXIT_FAILURE, 2307 "Cannot configure not existed rxq: " 2308 "port=%d\n", portid); 2309 2310 n_tx_queue = nb_lcores; 2311 if (n_tx_queue > dev_txq_num) 2312 n_tx_queue = dev_txq_num; 2313 printf("Creating queues: nb_rxq=%d nb_txq=%u... ", 2314 nb_rx_queue, (unsigned)n_tx_queue ); 2315 /* If number of Rx queue is 0, no need to enable Rx interrupt */ 2316 if (nb_rx_queue == 0) 2317 need_intr = false; 2318 2319 if (need_intr) 2320 local_port_conf.intr_conf.rxq = 1; 2321 2322 ret = rte_eth_dev_info_get(portid, &dev_info); 2323 if (ret != 0) 2324 rte_exit(EXIT_FAILURE, 2325 "Error during getting device (port %u) info: %s\n", 2326 portid, strerror(-ret)); 2327 2328 if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE) 2329 local_port_conf.txmode.offloads |= 2330 DEV_TX_OFFLOAD_MBUF_FAST_FREE; 2331 2332 local_port_conf.rx_adv_conf.rss_conf.rss_hf &= 2333 dev_info.flow_type_rss_offloads; 2334 if (local_port_conf.rx_adv_conf.rss_conf.rss_hf != 2335 port_conf.rx_adv_conf.rss_conf.rss_hf) { 2336 printf("Port %u modified RSS hash function based on hardware support," 2337 "requested:%#"PRIx64" configured:%#"PRIx64"\n", 2338 portid, 2339 port_conf.rx_adv_conf.rss_conf.rss_hf, 2340 local_port_conf.rx_adv_conf.rss_conf.rss_hf); 2341 } 2342 2343 ret = rte_eth_dev_configure(portid, nb_rx_queue, 2344 (uint16_t)n_tx_queue, &local_port_conf); 2345 if (ret < 0) 2346 rte_exit(EXIT_FAILURE, "Cannot configure device: " 2347 "err=%d, port=%d\n", ret, portid); 2348 2349 ret = rte_eth_dev_adjust_nb_rx_tx_desc(portid, &nb_rxd, 2350 &nb_txd); 2351 if (ret < 0) 2352 rte_exit(EXIT_FAILURE, 2353 "Cannot adjust number of descriptors: err=%d, port=%d\n", 2354 ret, portid); 2355 2356 ret = rte_eth_macaddr_get(portid, &ports_eth_addr[portid]); 2357 if (ret < 0) 2358 rte_exit(EXIT_FAILURE, 2359 "Cannot get MAC address: err=%d, port=%d\n", 2360 ret, portid); 2361 2362 print_ethaddr(" Address:", &ports_eth_addr[portid]); 2363 printf(", "); 2364 2365 /* init memory */ 2366 ret = init_mem(NB_MBUF); 2367 if (ret < 0) 2368 rte_exit(EXIT_FAILURE, "init_mem failed\n"); 2369 2370 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { 2371 if (rte_lcore_is_enabled(lcore_id) == 0) 2372 continue; 2373 2374 /* Initialize TX buffers */ 2375 qconf = &lcore_conf[lcore_id]; 2376 qconf->tx_buffer[portid] = rte_zmalloc_socket("tx_buffer", 2377 RTE_ETH_TX_BUFFER_SIZE(MAX_PKT_BURST), 0, 2378 rte_eth_dev_socket_id(portid)); 2379 if (qconf->tx_buffer[portid] == NULL) 2380 rte_exit(EXIT_FAILURE, "Can't allocate tx buffer for port %u\n", 2381 portid); 2382 2383 rte_eth_tx_buffer_init(qconf->tx_buffer[portid], MAX_PKT_BURST); 2384 } 2385 2386 /* init one TX queue per couple (lcore,port) */ 2387 queueid = 0; 2388 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { 2389 if (rte_lcore_is_enabled(lcore_id) == 0) 2390 continue; 2391 2392 if (queueid >= dev_txq_num) 2393 continue; 2394 2395 if (numa_on) 2396 socketid = \ 2397 (uint8_t)rte_lcore_to_socket_id(lcore_id); 2398 else 2399 socketid = 0; 2400 2401 printf("txq=%u,%d,%d ", lcore_id, queueid, socketid); 2402 fflush(stdout); 2403 2404 txconf = &dev_info.default_txconf; 2405 txconf->offloads = local_port_conf.txmode.offloads; 2406 ret = rte_eth_tx_queue_setup(portid, queueid, nb_txd, 2407 socketid, txconf); 2408 if (ret < 0) 2409 rte_exit(EXIT_FAILURE, 2410 "rte_eth_tx_queue_setup: err=%d, " 2411 "port=%d\n", ret, portid); 2412 2413 qconf = &lcore_conf[lcore_id]; 2414 qconf->tx_queue_id[portid] = queueid; 2415 queueid++; 2416 2417 qconf->tx_port_id[qconf->n_tx_port] = portid; 2418 qconf->n_tx_port++; 2419 } 2420 printf("\n"); 2421 } 2422 2423 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { 2424 if (rte_lcore_is_enabled(lcore_id) == 0) 2425 continue; 2426 2427 if (app_mode == APP_MODE_LEGACY) { 2428 /* init timer structures for each enabled lcore */ 2429 rte_timer_init(&power_timers[lcore_id]); 2430 hz = rte_get_timer_hz(); 2431 rte_timer_reset(&power_timers[lcore_id], 2432 hz/TIMER_NUMBER_PER_SECOND, 2433 SINGLE, lcore_id, 2434 power_timer_cb, NULL); 2435 } 2436 qconf = &lcore_conf[lcore_id]; 2437 printf("\nInitializing rx queues on lcore %u ... ", lcore_id ); 2438 fflush(stdout); 2439 /* init RX queues */ 2440 for(queue = 0; queue < qconf->n_rx_queue; ++queue) { 2441 struct rte_eth_rxconf rxq_conf; 2442 2443 portid = qconf->rx_queue_list[queue].port_id; 2444 queueid = qconf->rx_queue_list[queue].queue_id; 2445 2446 if (numa_on) 2447 socketid = \ 2448 (uint8_t)rte_lcore_to_socket_id(lcore_id); 2449 else 2450 socketid = 0; 2451 2452 printf("rxq=%d,%d,%d ", portid, queueid, socketid); 2453 fflush(stdout); 2454 2455 ret = rte_eth_dev_info_get(portid, &dev_info); 2456 if (ret != 0) 2457 rte_exit(EXIT_FAILURE, 2458 "Error during getting device (port %u) info: %s\n", 2459 portid, strerror(-ret)); 2460 2461 rxq_conf = dev_info.default_rxconf; 2462 rxq_conf.offloads = port_conf.rxmode.offloads; 2463 ret = rte_eth_rx_queue_setup(portid, queueid, nb_rxd, 2464 socketid, &rxq_conf, 2465 pktmbuf_pool[socketid]); 2466 if (ret < 0) 2467 rte_exit(EXIT_FAILURE, 2468 "rte_eth_rx_queue_setup: err=%d, " 2469 "port=%d\n", ret, portid); 2470 2471 if (parse_ptype) { 2472 if (add_cb_parse_ptype(portid, queueid) < 0) 2473 rte_exit(EXIT_FAILURE, 2474 "Fail to add ptype cb\n"); 2475 } else if (!check_ptype(portid)) 2476 rte_exit(EXIT_FAILURE, 2477 "PMD can not provide needed ptypes\n"); 2478 } 2479 } 2480 2481 printf("\n"); 2482 2483 /* start ports */ 2484 RTE_ETH_FOREACH_DEV(portid) { 2485 if ((enabled_port_mask & (1 << portid)) == 0) { 2486 continue; 2487 } 2488 /* Start device */ 2489 ret = rte_eth_dev_start(portid); 2490 if (ret < 0) 2491 rte_exit(EXIT_FAILURE, "rte_eth_dev_start: err=%d, " 2492 "port=%d\n", ret, portid); 2493 /* 2494 * If enabled, put device in promiscuous mode. 2495 * This allows IO forwarding mode to forward packets 2496 * to itself through 2 cross-connected ports of the 2497 * target machine. 2498 */ 2499 if (promiscuous_on) { 2500 ret = rte_eth_promiscuous_enable(portid); 2501 if (ret != 0) 2502 rte_exit(EXIT_FAILURE, 2503 "rte_eth_promiscuous_enable: err=%s, port=%u\n", 2504 rte_strerror(-ret), portid); 2505 } 2506 /* initialize spinlock for each port */ 2507 rte_spinlock_init(&(locks[portid])); 2508 } 2509 2510 check_all_ports_link_status(enabled_port_mask); 2511 2512 if (app_mode == APP_MODE_EMPTY_POLL) { 2513 2514 if (empty_poll_train) { 2515 policy.state = TRAINING; 2516 } else { 2517 policy.state = MED_NORMAL; 2518 policy.med_base_edpi = ep_med_edpi; 2519 policy.hgh_base_edpi = ep_hgh_edpi; 2520 } 2521 2522 ret = rte_power_empty_poll_stat_init(&ep_params, 2523 freq_tlb, 2524 &policy); 2525 if (ret < 0) 2526 rte_exit(EXIT_FAILURE, "empty poll init failed"); 2527 } 2528 2529 2530 /* launch per-lcore init on every lcore */ 2531 if (app_mode == APP_MODE_LEGACY) { 2532 rte_eal_mp_remote_launch(main_legacy_loop, NULL, CALL_MASTER); 2533 } else if (app_mode == APP_MODE_EMPTY_POLL) { 2534 empty_poll_stop = false; 2535 rte_eal_mp_remote_launch(main_empty_poll_loop, NULL, 2536 SKIP_MASTER); 2537 } else if (app_mode == APP_MODE_TELEMETRY) { 2538 unsigned int i; 2539 2540 /* Init metrics library */ 2541 rte_metrics_init(rte_socket_id()); 2542 /** Register stats with metrics library */ 2543 for (i = 0; i < NUM_TELSTATS; i++) 2544 ptr_strings[i] = telstats_strings[i].name; 2545 2546 ret = rte_metrics_reg_names(ptr_strings, NUM_TELSTATS); 2547 if (ret >= 0) 2548 telstats_index = ret; 2549 else 2550 rte_exit(EXIT_FAILURE, "failed to register metrics names"); 2551 2552 RTE_LCORE_FOREACH_SLAVE(lcore_id) { 2553 rte_spinlock_init(&stats[lcore_id].telemetry_lock); 2554 } 2555 rte_timer_init(&telemetry_timer); 2556 rte_telemetry_register_cmd("/l3fwd-power/stats", 2557 handle_app_stats, 2558 "Returns global power stats. Parameters: None"); 2559 rte_eal_mp_remote_launch(main_telemetry_loop, NULL, 2560 SKIP_MASTER); 2561 } 2562 2563 if (app_mode == APP_MODE_EMPTY_POLL || app_mode == APP_MODE_TELEMETRY) 2564 launch_timer(rte_lcore_id()); 2565 2566 RTE_LCORE_FOREACH_SLAVE(lcore_id) { 2567 if (rte_eal_wait_lcore(lcore_id) < 0) 2568 return -1; 2569 } 2570 2571 RTE_ETH_FOREACH_DEV(portid) 2572 { 2573 if ((enabled_port_mask & (1 << portid)) == 0) 2574 continue; 2575 2576 rte_eth_dev_stop(portid); 2577 rte_eth_dev_close(portid); 2578 } 2579 2580 if (app_mode == APP_MODE_EMPTY_POLL) 2581 rte_power_empty_poll_stat_free(); 2582 2583 if ((app_mode == APP_MODE_LEGACY || app_mode == APP_MODE_EMPTY_POLL) && 2584 deinit_power_library()) 2585 rte_exit(EXIT_FAILURE, "deinit_power_library failed\n"); 2586 2587 if (rte_eal_cleanup() < 0) 2588 RTE_LOG(ERR, L3FWD_POWER, "EAL cleanup failed\n"); 2589 2590 return 0; 2591 } 2592