1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2018 Intel Corporation 3 */ 4 5 #include <stdio.h> 6 #include <stdlib.h> 7 #include <stdint.h> 8 #include <inttypes.h> 9 #include <sys/types.h> 10 #include <string.h> 11 #include <sys/queue.h> 12 #include <stdarg.h> 13 #include <errno.h> 14 #include <getopt.h> 15 #include <unistd.h> 16 #include <signal.h> 17 #include <math.h> 18 19 #include <rte_common.h> 20 #include <rte_byteorder.h> 21 #include <rte_log.h> 22 #include <rte_malloc.h> 23 #include <rte_memory.h> 24 #include <rte_memcpy.h> 25 #include <rte_eal.h> 26 #include <rte_launch.h> 27 #include <rte_atomic.h> 28 #include <rte_cycles.h> 29 #include <rte_prefetch.h> 30 #include <rte_lcore.h> 31 #include <rte_per_lcore.h> 32 #include <rte_branch_prediction.h> 33 #include <rte_interrupts.h> 34 #include <rte_random.h> 35 #include <rte_debug.h> 36 #include <rte_ether.h> 37 #include <rte_ethdev.h> 38 #include <rte_mempool.h> 39 #include <rte_mbuf.h> 40 #include <rte_ip.h> 41 #include <rte_tcp.h> 42 #include <rte_udp.h> 43 #include <rte_string_fns.h> 44 #include <rte_timer.h> 45 #include <rte_power.h> 46 #include <rte_spinlock.h> 47 #include <rte_power_empty_poll.h> 48 #include <rte_metrics.h> 49 #include <rte_telemetry.h> 50 51 #include "perf_core.h" 52 #include "main.h" 53 54 #define RTE_LOGTYPE_L3FWD_POWER RTE_LOGTYPE_USER1 55 56 #define MAX_PKT_BURST 32 57 58 #define MIN_ZERO_POLL_COUNT 10 59 60 /* 100 ms interval */ 61 #define TIMER_NUMBER_PER_SECOND 10 62 /* (10ms) */ 63 #define INTERVALS_PER_SECOND 100 64 /* 100000 us */ 65 #define SCALING_PERIOD (1000000/TIMER_NUMBER_PER_SECOND) 66 #define SCALING_DOWN_TIME_RATIO_THRESHOLD 0.25 67 68 #define APP_LOOKUP_EXACT_MATCH 0 69 #define APP_LOOKUP_LPM 1 70 #define DO_RFC_1812_CHECKS 71 72 #ifndef APP_LOOKUP_METHOD 73 #define APP_LOOKUP_METHOD APP_LOOKUP_LPM 74 #endif 75 76 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 77 #include <rte_hash.h> 78 #elif (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 79 #include <rte_lpm.h> 80 #else 81 #error "APP_LOOKUP_METHOD set to incorrect value" 82 #endif 83 84 #ifndef IPv6_BYTES 85 #define IPv6_BYTES_FMT "%02x%02x:%02x%02x:%02x%02x:%02x%02x:"\ 86 "%02x%02x:%02x%02x:%02x%02x:%02x%02x" 87 #define IPv6_BYTES(addr) \ 88 addr[0], addr[1], addr[2], addr[3], \ 89 addr[4], addr[5], addr[6], addr[7], \ 90 addr[8], addr[9], addr[10], addr[11],\ 91 addr[12], addr[13],addr[14], addr[15] 92 #endif 93 94 #define MAX_JUMBO_PKT_LEN 9600 95 96 #define IPV6_ADDR_LEN 16 97 98 #define MEMPOOL_CACHE_SIZE 256 99 100 /* 101 * This expression is used to calculate the number of mbufs needed depending on 102 * user input, taking into account memory for rx and tx hardware rings, cache 103 * per lcore and mtable per port per lcore. RTE_MAX is used to ensure that 104 * NB_MBUF never goes below a minimum value of 8192. 105 */ 106 107 #define NB_MBUF RTE_MAX ( \ 108 (nb_ports*nb_rx_queue*nb_rxd + \ 109 nb_ports*nb_lcores*MAX_PKT_BURST + \ 110 nb_ports*n_tx_queue*nb_txd + \ 111 nb_lcores*MEMPOOL_CACHE_SIZE), \ 112 (unsigned)8192) 113 114 #define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */ 115 116 #define NB_SOCKETS 8 117 118 /* Configure how many packets ahead to prefetch, when reading packets */ 119 #define PREFETCH_OFFSET 3 120 121 /* 122 * Configurable number of RX/TX ring descriptors 123 */ 124 #define RTE_TEST_RX_DESC_DEFAULT 1024 125 #define RTE_TEST_TX_DESC_DEFAULT 1024 126 127 /* 128 * These two thresholds were decided on by running the training algorithm on 129 * a 2.5GHz Xeon. These defaults can be overridden by supplying non-zero values 130 * for the med_threshold and high_threshold parameters on the command line. 131 */ 132 #define EMPTY_POLL_MED_THRESHOLD 350000UL 133 #define EMPTY_POLL_HGH_THRESHOLD 580000UL 134 135 #define NUM_TELSTATS RTE_DIM(telstats_strings) 136 137 static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT; 138 static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT; 139 140 /* ethernet addresses of ports */ 141 static struct rte_ether_addr ports_eth_addr[RTE_MAX_ETHPORTS]; 142 143 /* ethernet addresses of ports */ 144 static rte_spinlock_t locks[RTE_MAX_ETHPORTS]; 145 146 /* mask of enabled ports */ 147 static uint32_t enabled_port_mask = 0; 148 /* Ports set in promiscuous mode off by default. */ 149 static int promiscuous_on = 0; 150 /* NUMA is enabled by default. */ 151 static int numa_on = 1; 152 static bool empty_poll_stop; 153 static bool empty_poll_train; 154 volatile bool quit_signal; 155 static struct ep_params *ep_params; 156 static struct ep_policy policy; 157 static long ep_med_edpi, ep_hgh_edpi; 158 /* timer to update telemetry every 500ms */ 159 static struct rte_timer telemetry_timer; 160 161 /* stats index returned by metrics lib */ 162 int telstats_index; 163 164 struct telstats_name { 165 char name[RTE_ETH_XSTATS_NAME_SIZE]; 166 }; 167 168 /* telemetry stats to be reported */ 169 const struct telstats_name telstats_strings[] = { 170 {"empty_poll"}, 171 {"full_poll"}, 172 {"busy_percent"} 173 }; 174 175 /* core busyness in percentage */ 176 enum busy_rate { 177 ZERO = 0, 178 PARTIAL = 50, 179 FULL = 100 180 }; 181 182 /* reference poll count to measure core busyness */ 183 #define DEFAULT_COUNT 10000 184 /* 185 * reference CYCLES to be used to 186 * measure core busyness based on poll count 187 */ 188 #define MIN_CYCLES 1500000ULL 189 #define MAX_CYCLES 22000000ULL 190 191 /* (500ms) */ 192 #define TELEMETRY_INTERVALS_PER_SEC 2 193 194 static int parse_ptype; /**< Parse packet type using rx callback, and */ 195 /**< disabled by default */ 196 197 enum appmode { 198 APP_MODE_DEFAULT = 0, 199 APP_MODE_LEGACY, 200 APP_MODE_EMPTY_POLL, 201 APP_MODE_TELEMETRY, 202 APP_MODE_INTERRUPT 203 }; 204 205 enum appmode app_mode; 206 207 enum freq_scale_hint_t 208 { 209 FREQ_LOWER = -1, 210 FREQ_CURRENT = 0, 211 FREQ_HIGHER = 1, 212 FREQ_HIGHEST = 2 213 }; 214 215 struct lcore_rx_queue { 216 uint16_t port_id; 217 uint8_t queue_id; 218 enum freq_scale_hint_t freq_up_hint; 219 uint32_t zero_rx_packet_count; 220 uint32_t idle_hint; 221 } __rte_cache_aligned; 222 223 #define MAX_RX_QUEUE_PER_LCORE 16 224 #define MAX_TX_QUEUE_PER_PORT RTE_MAX_ETHPORTS 225 #define MAX_RX_QUEUE_PER_PORT 128 226 227 #define MAX_RX_QUEUE_INTERRUPT_PER_PORT 16 228 229 230 struct lcore_params lcore_params_array[MAX_LCORE_PARAMS]; 231 static struct lcore_params lcore_params_array_default[] = { 232 {0, 0, 2}, 233 {0, 1, 2}, 234 {0, 2, 2}, 235 {1, 0, 2}, 236 {1, 1, 2}, 237 {1, 2, 2}, 238 {2, 0, 2}, 239 {3, 0, 3}, 240 {3, 1, 3}, 241 }; 242 243 struct lcore_params *lcore_params = lcore_params_array_default; 244 uint16_t nb_lcore_params = RTE_DIM(lcore_params_array_default); 245 246 static struct rte_eth_conf port_conf = { 247 .rxmode = { 248 .mq_mode = ETH_MQ_RX_RSS, 249 .max_rx_pkt_len = RTE_ETHER_MAX_LEN, 250 .split_hdr_size = 0, 251 .offloads = DEV_RX_OFFLOAD_CHECKSUM, 252 }, 253 .rx_adv_conf = { 254 .rss_conf = { 255 .rss_key = NULL, 256 .rss_hf = ETH_RSS_UDP, 257 }, 258 }, 259 .txmode = { 260 .mq_mode = ETH_MQ_TX_NONE, 261 } 262 }; 263 264 static struct rte_mempool * pktmbuf_pool[NB_SOCKETS]; 265 266 267 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 268 269 #ifdef RTE_ARCH_X86 270 #include <rte_hash_crc.h> 271 #define DEFAULT_HASH_FUNC rte_hash_crc 272 #else 273 #include <rte_jhash.h> 274 #define DEFAULT_HASH_FUNC rte_jhash 275 #endif 276 277 struct ipv4_5tuple { 278 uint32_t ip_dst; 279 uint32_t ip_src; 280 uint16_t port_dst; 281 uint16_t port_src; 282 uint8_t proto; 283 } __rte_packed; 284 285 struct ipv6_5tuple { 286 uint8_t ip_dst[IPV6_ADDR_LEN]; 287 uint8_t ip_src[IPV6_ADDR_LEN]; 288 uint16_t port_dst; 289 uint16_t port_src; 290 uint8_t proto; 291 } __rte_packed; 292 293 struct ipv4_l3fwd_route { 294 struct ipv4_5tuple key; 295 uint8_t if_out; 296 }; 297 298 struct ipv6_l3fwd_route { 299 struct ipv6_5tuple key; 300 uint8_t if_out; 301 }; 302 303 static struct ipv4_l3fwd_route ipv4_l3fwd_route_array[] = { 304 {{RTE_IPV4(100,10,0,1), RTE_IPV4(200,10,0,1), 101, 11, IPPROTO_TCP}, 0}, 305 {{RTE_IPV4(100,20,0,2), RTE_IPV4(200,20,0,2), 102, 12, IPPROTO_TCP}, 1}, 306 {{RTE_IPV4(100,30,0,3), RTE_IPV4(200,30,0,3), 103, 13, IPPROTO_TCP}, 2}, 307 {{RTE_IPV4(100,40,0,4), RTE_IPV4(200,40,0,4), 104, 14, IPPROTO_TCP}, 3}, 308 }; 309 310 static struct ipv6_l3fwd_route ipv6_l3fwd_route_array[] = { 311 { 312 { 313 {0xfe, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 314 0x02, 0x1b, 0x21, 0xff, 0xfe, 0x91, 0x38, 0x05}, 315 {0xfe, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 316 0x02, 0x1e, 0x67, 0xff, 0xfe, 0x0d, 0xb6, 0x0a}, 317 1, 10, IPPROTO_UDP 318 }, 4 319 }, 320 }; 321 322 typedef struct rte_hash lookup_struct_t; 323 static lookup_struct_t *ipv4_l3fwd_lookup_struct[NB_SOCKETS]; 324 static lookup_struct_t *ipv6_l3fwd_lookup_struct[NB_SOCKETS]; 325 326 #define L3FWD_HASH_ENTRIES 1024 327 328 static uint16_t ipv4_l3fwd_out_if[L3FWD_HASH_ENTRIES] __rte_cache_aligned; 329 static uint16_t ipv6_l3fwd_out_if[L3FWD_HASH_ENTRIES] __rte_cache_aligned; 330 #endif 331 332 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 333 struct ipv4_l3fwd_route { 334 uint32_t ip; 335 uint8_t depth; 336 uint8_t if_out; 337 }; 338 339 static struct ipv4_l3fwd_route ipv4_l3fwd_route_array[] = { 340 {RTE_IPV4(1,1,1,0), 24, 0}, 341 {RTE_IPV4(2,1,1,0), 24, 1}, 342 {RTE_IPV4(3,1,1,0), 24, 2}, 343 {RTE_IPV4(4,1,1,0), 24, 3}, 344 {RTE_IPV4(5,1,1,0), 24, 4}, 345 {RTE_IPV4(6,1,1,0), 24, 5}, 346 {RTE_IPV4(7,1,1,0), 24, 6}, 347 {RTE_IPV4(8,1,1,0), 24, 7}, 348 }; 349 350 #define IPV4_L3FWD_LPM_MAX_RULES 1024 351 352 typedef struct rte_lpm lookup_struct_t; 353 static lookup_struct_t *ipv4_l3fwd_lookup_struct[NB_SOCKETS]; 354 #endif 355 356 struct lcore_conf { 357 uint16_t n_rx_queue; 358 struct lcore_rx_queue rx_queue_list[MAX_RX_QUEUE_PER_LCORE]; 359 uint16_t n_tx_port; 360 uint16_t tx_port_id[RTE_MAX_ETHPORTS]; 361 uint16_t tx_queue_id[RTE_MAX_ETHPORTS]; 362 struct rte_eth_dev_tx_buffer *tx_buffer[RTE_MAX_ETHPORTS]; 363 lookup_struct_t * ipv4_lookup_struct; 364 lookup_struct_t * ipv6_lookup_struct; 365 } __rte_cache_aligned; 366 367 struct lcore_stats { 368 /* total sleep time in ms since last frequency scaling down */ 369 uint32_t sleep_time; 370 /* number of long sleep recently */ 371 uint32_t nb_long_sleep; 372 /* freq. scaling up trend */ 373 uint32_t trend; 374 /* total packet processed recently */ 375 uint64_t nb_rx_processed; 376 /* total iterations looped recently */ 377 uint64_t nb_iteration_looped; 378 /* 379 * Represents empty and non empty polls 380 * of rte_eth_rx_burst(); 381 * ep_nep[0] holds non empty polls 382 * i.e. 0 < nb_rx <= MAX_BURST 383 * ep_nep[1] holds empty polls. 384 * i.e. nb_rx == 0 385 */ 386 uint64_t ep_nep[2]; 387 /* 388 * Represents full and empty+partial 389 * polls of rte_eth_rx_burst(); 390 * ep_nep[0] holds empty+partial polls. 391 * i.e. 0 <= nb_rx < MAX_BURST 392 * ep_nep[1] holds full polls 393 * i.e. nb_rx == MAX_BURST 394 */ 395 uint64_t fp_nfp[2]; 396 enum busy_rate br; 397 rte_spinlock_t telemetry_lock; 398 } __rte_cache_aligned; 399 400 static struct lcore_conf lcore_conf[RTE_MAX_LCORE] __rte_cache_aligned; 401 static struct lcore_stats stats[RTE_MAX_LCORE] __rte_cache_aligned; 402 static struct rte_timer power_timers[RTE_MAX_LCORE]; 403 404 static inline uint32_t power_idle_heuristic(uint32_t zero_rx_packet_count); 405 static inline enum freq_scale_hint_t power_freq_scaleup_heuristic( \ 406 unsigned int lcore_id, uint16_t port_id, uint16_t queue_id); 407 408 409 /* 410 * These defaults are using the max frequency index (1), a medium index (9) 411 * and a typical low frequency index (14). These can be adjusted to use 412 * different indexes using the relevant command line parameters. 413 */ 414 static uint8_t freq_tlb[] = {14, 9, 1}; 415 416 static int is_done(void) 417 { 418 return quit_signal; 419 } 420 421 /* exit signal handler */ 422 static void 423 signal_exit_now(int sigtype) 424 { 425 426 if (sigtype == SIGINT) 427 quit_signal = true; 428 429 } 430 431 /* Freqency scale down timer callback */ 432 static void 433 power_timer_cb(__rte_unused struct rte_timer *tim, 434 __rte_unused void *arg) 435 { 436 uint64_t hz; 437 float sleep_time_ratio; 438 unsigned lcore_id = rte_lcore_id(); 439 440 /* accumulate total execution time in us when callback is invoked */ 441 sleep_time_ratio = (float)(stats[lcore_id].sleep_time) / 442 (float)SCALING_PERIOD; 443 /** 444 * check whether need to scale down frequency a step if it sleep a lot. 445 */ 446 if (sleep_time_ratio >= SCALING_DOWN_TIME_RATIO_THRESHOLD) { 447 if (rte_power_freq_down) 448 rte_power_freq_down(lcore_id); 449 } 450 else if ( (unsigned)(stats[lcore_id].nb_rx_processed / 451 stats[lcore_id].nb_iteration_looped) < MAX_PKT_BURST) { 452 /** 453 * scale down a step if average packet per iteration less 454 * than expectation. 455 */ 456 if (rte_power_freq_down) 457 rte_power_freq_down(lcore_id); 458 } 459 460 /** 461 * initialize another timer according to current frequency to ensure 462 * timer interval is relatively fixed. 463 */ 464 hz = rte_get_timer_hz(); 465 rte_timer_reset(&power_timers[lcore_id], hz/TIMER_NUMBER_PER_SECOND, 466 SINGLE, lcore_id, power_timer_cb, NULL); 467 468 stats[lcore_id].nb_rx_processed = 0; 469 stats[lcore_id].nb_iteration_looped = 0; 470 471 stats[lcore_id].sleep_time = 0; 472 } 473 474 /* Enqueue a single packet, and send burst if queue is filled */ 475 static inline int 476 send_single_packet(struct rte_mbuf *m, uint16_t port) 477 { 478 uint32_t lcore_id; 479 struct lcore_conf *qconf; 480 481 lcore_id = rte_lcore_id(); 482 qconf = &lcore_conf[lcore_id]; 483 484 rte_eth_tx_buffer(port, qconf->tx_queue_id[port], 485 qconf->tx_buffer[port], m); 486 487 return 0; 488 } 489 490 #ifdef DO_RFC_1812_CHECKS 491 static inline int 492 is_valid_ipv4_pkt(struct rte_ipv4_hdr *pkt, uint32_t link_len) 493 { 494 /* From http://www.rfc-editor.org/rfc/rfc1812.txt section 5.2.2 */ 495 /* 496 * 1. The packet length reported by the Link Layer must be large 497 * enough to hold the minimum length legal IP datagram (20 bytes). 498 */ 499 if (link_len < sizeof(struct rte_ipv4_hdr)) 500 return -1; 501 502 /* 2. The IP checksum must be correct. */ 503 /* this is checked in H/W */ 504 505 /* 506 * 3. The IP version number must be 4. If the version number is not 4 507 * then the packet may be another version of IP, such as IPng or 508 * ST-II. 509 */ 510 if (((pkt->version_ihl) >> 4) != 4) 511 return -3; 512 /* 513 * 4. The IP header length field must be large enough to hold the 514 * minimum length legal IP datagram (20 bytes = 5 words). 515 */ 516 if ((pkt->version_ihl & 0xf) < 5) 517 return -4; 518 519 /* 520 * 5. The IP total length field must be large enough to hold the IP 521 * datagram header, whose length is specified in the IP header length 522 * field. 523 */ 524 if (rte_cpu_to_be_16(pkt->total_length) < sizeof(struct rte_ipv4_hdr)) 525 return -5; 526 527 return 0; 528 } 529 #endif 530 531 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 532 static void 533 print_ipv4_key(struct ipv4_5tuple key) 534 { 535 printf("IP dst = %08x, IP src = %08x, port dst = %d, port src = %d, " 536 "proto = %d\n", (unsigned)key.ip_dst, (unsigned)key.ip_src, 537 key.port_dst, key.port_src, key.proto); 538 } 539 static void 540 print_ipv6_key(struct ipv6_5tuple key) 541 { 542 printf( "IP dst = " IPv6_BYTES_FMT ", IP src = " IPv6_BYTES_FMT ", " 543 "port dst = %d, port src = %d, proto = %d\n", 544 IPv6_BYTES(key.ip_dst), IPv6_BYTES(key.ip_src), 545 key.port_dst, key.port_src, key.proto); 546 } 547 548 static inline uint16_t 549 get_ipv4_dst_port(struct rte_ipv4_hdr *ipv4_hdr, uint16_t portid, 550 lookup_struct_t * ipv4_l3fwd_lookup_struct) 551 { 552 struct ipv4_5tuple key; 553 struct rte_tcp_hdr *tcp; 554 struct rte_udp_hdr *udp; 555 int ret = 0; 556 557 key.ip_dst = rte_be_to_cpu_32(ipv4_hdr->dst_addr); 558 key.ip_src = rte_be_to_cpu_32(ipv4_hdr->src_addr); 559 key.proto = ipv4_hdr->next_proto_id; 560 561 switch (ipv4_hdr->next_proto_id) { 562 case IPPROTO_TCP: 563 tcp = (struct rte_tcp_hdr *)((unsigned char *)ipv4_hdr + 564 sizeof(struct rte_ipv4_hdr)); 565 key.port_dst = rte_be_to_cpu_16(tcp->dst_port); 566 key.port_src = rte_be_to_cpu_16(tcp->src_port); 567 break; 568 569 case IPPROTO_UDP: 570 udp = (struct rte_udp_hdr *)((unsigned char *)ipv4_hdr + 571 sizeof(struct rte_ipv4_hdr)); 572 key.port_dst = rte_be_to_cpu_16(udp->dst_port); 573 key.port_src = rte_be_to_cpu_16(udp->src_port); 574 break; 575 576 default: 577 key.port_dst = 0; 578 key.port_src = 0; 579 break; 580 } 581 582 /* Find destination port */ 583 ret = rte_hash_lookup(ipv4_l3fwd_lookup_struct, (const void *)&key); 584 return ((ret < 0) ? portid : ipv4_l3fwd_out_if[ret]); 585 } 586 587 static inline uint16_t 588 get_ipv6_dst_port(struct rte_ipv6_hdr *ipv6_hdr, uint16_t portid, 589 lookup_struct_t *ipv6_l3fwd_lookup_struct) 590 { 591 struct ipv6_5tuple key; 592 struct rte_tcp_hdr *tcp; 593 struct rte_udp_hdr *udp; 594 int ret = 0; 595 596 memcpy(key.ip_dst, ipv6_hdr->dst_addr, IPV6_ADDR_LEN); 597 memcpy(key.ip_src, ipv6_hdr->src_addr, IPV6_ADDR_LEN); 598 599 key.proto = ipv6_hdr->proto; 600 601 switch (ipv6_hdr->proto) { 602 case IPPROTO_TCP: 603 tcp = (struct rte_tcp_hdr *)((unsigned char *) ipv6_hdr + 604 sizeof(struct rte_ipv6_hdr)); 605 key.port_dst = rte_be_to_cpu_16(tcp->dst_port); 606 key.port_src = rte_be_to_cpu_16(tcp->src_port); 607 break; 608 609 case IPPROTO_UDP: 610 udp = (struct rte_udp_hdr *)((unsigned char *) ipv6_hdr + 611 sizeof(struct rte_ipv6_hdr)); 612 key.port_dst = rte_be_to_cpu_16(udp->dst_port); 613 key.port_src = rte_be_to_cpu_16(udp->src_port); 614 break; 615 616 default: 617 key.port_dst = 0; 618 key.port_src = 0; 619 break; 620 } 621 622 /* Find destination port */ 623 ret = rte_hash_lookup(ipv6_l3fwd_lookup_struct, (const void *)&key); 624 return ((ret < 0) ? portid : ipv6_l3fwd_out_if[ret]); 625 } 626 #endif 627 628 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 629 static inline uint16_t 630 get_ipv4_dst_port(struct rte_ipv4_hdr *ipv4_hdr, uint16_t portid, 631 lookup_struct_t *ipv4_l3fwd_lookup_struct) 632 { 633 uint32_t next_hop; 634 635 return ((rte_lpm_lookup(ipv4_l3fwd_lookup_struct, 636 rte_be_to_cpu_32(ipv4_hdr->dst_addr), &next_hop) == 0)? 637 next_hop : portid); 638 } 639 #endif 640 641 static inline void 642 parse_ptype_one(struct rte_mbuf *m) 643 { 644 struct rte_ether_hdr *eth_hdr; 645 uint32_t packet_type = RTE_PTYPE_UNKNOWN; 646 uint16_t ether_type; 647 648 eth_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); 649 ether_type = eth_hdr->ether_type; 650 if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4)) 651 packet_type |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN; 652 else if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6)) 653 packet_type |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN; 654 655 m->packet_type = packet_type; 656 } 657 658 static uint16_t 659 cb_parse_ptype(uint16_t port __rte_unused, uint16_t queue __rte_unused, 660 struct rte_mbuf *pkts[], uint16_t nb_pkts, 661 uint16_t max_pkts __rte_unused, 662 void *user_param __rte_unused) 663 { 664 unsigned int i; 665 666 for (i = 0; i < nb_pkts; ++i) 667 parse_ptype_one(pkts[i]); 668 669 return nb_pkts; 670 } 671 672 static int 673 add_cb_parse_ptype(uint16_t portid, uint16_t queueid) 674 { 675 printf("Port %d: softly parse packet type info\n", portid); 676 if (rte_eth_add_rx_callback(portid, queueid, cb_parse_ptype, NULL)) 677 return 0; 678 679 printf("Failed to add rx callback: port=%d\n", portid); 680 return -1; 681 } 682 683 static inline void 684 l3fwd_simple_forward(struct rte_mbuf *m, uint16_t portid, 685 struct lcore_conf *qconf) 686 { 687 struct rte_ether_hdr *eth_hdr; 688 struct rte_ipv4_hdr *ipv4_hdr; 689 void *d_addr_bytes; 690 uint16_t dst_port; 691 692 eth_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); 693 694 if (RTE_ETH_IS_IPV4_HDR(m->packet_type)) { 695 /* Handle IPv4 headers.*/ 696 ipv4_hdr = 697 rte_pktmbuf_mtod_offset(m, struct rte_ipv4_hdr *, 698 sizeof(struct rte_ether_hdr)); 699 700 #ifdef DO_RFC_1812_CHECKS 701 /* Check to make sure the packet is valid (RFC1812) */ 702 if (is_valid_ipv4_pkt(ipv4_hdr, m->pkt_len) < 0) { 703 rte_pktmbuf_free(m); 704 return; 705 } 706 #endif 707 708 dst_port = get_ipv4_dst_port(ipv4_hdr, portid, 709 qconf->ipv4_lookup_struct); 710 if (dst_port >= RTE_MAX_ETHPORTS || 711 (enabled_port_mask & 1 << dst_port) == 0) 712 dst_port = portid; 713 714 /* 02:00:00:00:00:xx */ 715 d_addr_bytes = ð_hdr->d_addr.addr_bytes[0]; 716 *((uint64_t *)d_addr_bytes) = 717 0x000000000002 + ((uint64_t)dst_port << 40); 718 719 #ifdef DO_RFC_1812_CHECKS 720 /* Update time to live and header checksum */ 721 --(ipv4_hdr->time_to_live); 722 ++(ipv4_hdr->hdr_checksum); 723 #endif 724 725 /* src addr */ 726 rte_ether_addr_copy(&ports_eth_addr[dst_port], 727 ð_hdr->s_addr); 728 729 send_single_packet(m, dst_port); 730 } else if (RTE_ETH_IS_IPV6_HDR(m->packet_type)) { 731 /* Handle IPv6 headers.*/ 732 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 733 struct rte_ipv6_hdr *ipv6_hdr; 734 735 ipv6_hdr = 736 rte_pktmbuf_mtod_offset(m, struct rte_ipv6_hdr *, 737 sizeof(struct rte_ether_hdr)); 738 739 dst_port = get_ipv6_dst_port(ipv6_hdr, portid, 740 qconf->ipv6_lookup_struct); 741 742 if (dst_port >= RTE_MAX_ETHPORTS || 743 (enabled_port_mask & 1 << dst_port) == 0) 744 dst_port = portid; 745 746 /* 02:00:00:00:00:xx */ 747 d_addr_bytes = ð_hdr->d_addr.addr_bytes[0]; 748 *((uint64_t *)d_addr_bytes) = 749 0x000000000002 + ((uint64_t)dst_port << 40); 750 751 /* src addr */ 752 rte_ether_addr_copy(&ports_eth_addr[dst_port], 753 ð_hdr->s_addr); 754 755 send_single_packet(m, dst_port); 756 #else 757 /* We don't currently handle IPv6 packets in LPM mode. */ 758 rte_pktmbuf_free(m); 759 #endif 760 } else 761 rte_pktmbuf_free(m); 762 763 } 764 765 #define MINIMUM_SLEEP_TIME 1 766 #define SUSPEND_THRESHOLD 300 767 768 static inline uint32_t 769 power_idle_heuristic(uint32_t zero_rx_packet_count) 770 { 771 /* If zero count is less than 100, sleep 1us */ 772 if (zero_rx_packet_count < SUSPEND_THRESHOLD) 773 return MINIMUM_SLEEP_TIME; 774 /* If zero count is less than 1000, sleep 100 us which is the 775 minimum latency switching from C3/C6 to C0 776 */ 777 else 778 return SUSPEND_THRESHOLD; 779 } 780 781 static inline enum freq_scale_hint_t 782 power_freq_scaleup_heuristic(unsigned lcore_id, 783 uint16_t port_id, 784 uint16_t queue_id) 785 { 786 uint32_t rxq_count = rte_eth_rx_queue_count(port_id, queue_id); 787 /** 788 * HW Rx queue size is 128 by default, Rx burst read at maximum 32 entries 789 * per iteration 790 */ 791 #define FREQ_GEAR1_RX_PACKET_THRESHOLD MAX_PKT_BURST 792 #define FREQ_GEAR2_RX_PACKET_THRESHOLD (MAX_PKT_BURST*2) 793 #define FREQ_GEAR3_RX_PACKET_THRESHOLD (MAX_PKT_BURST*3) 794 #define FREQ_UP_TREND1_ACC 1 795 #define FREQ_UP_TREND2_ACC 100 796 #define FREQ_UP_THRESHOLD 10000 797 798 if (likely(rxq_count > FREQ_GEAR3_RX_PACKET_THRESHOLD)) { 799 stats[lcore_id].trend = 0; 800 return FREQ_HIGHEST; 801 } else if (likely(rxq_count > FREQ_GEAR2_RX_PACKET_THRESHOLD)) 802 stats[lcore_id].trend += FREQ_UP_TREND2_ACC; 803 else if (likely(rxq_count > FREQ_GEAR1_RX_PACKET_THRESHOLD)) 804 stats[lcore_id].trend += FREQ_UP_TREND1_ACC; 805 806 if (likely(stats[lcore_id].trend > FREQ_UP_THRESHOLD)) { 807 stats[lcore_id].trend = 0; 808 return FREQ_HIGHER; 809 } 810 811 return FREQ_CURRENT; 812 } 813 814 /** 815 * force polling thread sleep until one-shot rx interrupt triggers 816 * @param port_id 817 * Port id. 818 * @param queue_id 819 * Rx queue id. 820 * @return 821 * 0 on success 822 */ 823 static int 824 sleep_until_rx_interrupt(int num, int lcore) 825 { 826 /* 827 * we want to track when we are woken up by traffic so that we can go 828 * back to sleep again without log spamming. Avoid cache line sharing 829 * to prevent threads stepping on each others' toes. 830 */ 831 static struct { 832 bool wakeup; 833 } __rte_cache_aligned status[RTE_MAX_LCORE]; 834 struct rte_epoll_event event[num]; 835 int n, i; 836 uint16_t port_id; 837 uint8_t queue_id; 838 void *data; 839 840 if (status[lcore].wakeup) { 841 RTE_LOG(INFO, L3FWD_POWER, 842 "lcore %u sleeps until interrupt triggers\n", 843 rte_lcore_id()); 844 } 845 846 n = rte_epoll_wait(RTE_EPOLL_PER_THREAD, event, num, 10); 847 for (i = 0; i < n; i++) { 848 data = event[i].epdata.data; 849 port_id = ((uintptr_t)data) >> CHAR_BIT; 850 queue_id = ((uintptr_t)data) & 851 RTE_LEN2MASK(CHAR_BIT, uint8_t); 852 RTE_LOG(INFO, L3FWD_POWER, 853 "lcore %u is waked up from rx interrupt on" 854 " port %d queue %d\n", 855 rte_lcore_id(), port_id, queue_id); 856 } 857 status[lcore].wakeup = n != 0; 858 859 return 0; 860 } 861 862 static void turn_on_off_intr(struct lcore_conf *qconf, bool on) 863 { 864 int i; 865 struct lcore_rx_queue *rx_queue; 866 uint8_t queue_id; 867 uint16_t port_id; 868 869 for (i = 0; i < qconf->n_rx_queue; ++i) { 870 rx_queue = &(qconf->rx_queue_list[i]); 871 port_id = rx_queue->port_id; 872 queue_id = rx_queue->queue_id; 873 874 rte_spinlock_lock(&(locks[port_id])); 875 if (on) 876 rte_eth_dev_rx_intr_enable(port_id, queue_id); 877 else 878 rte_eth_dev_rx_intr_disable(port_id, queue_id); 879 rte_spinlock_unlock(&(locks[port_id])); 880 } 881 } 882 883 static int event_register(struct lcore_conf *qconf) 884 { 885 struct lcore_rx_queue *rx_queue; 886 uint8_t queueid; 887 uint16_t portid; 888 uint32_t data; 889 int ret; 890 int i; 891 892 for (i = 0; i < qconf->n_rx_queue; ++i) { 893 rx_queue = &(qconf->rx_queue_list[i]); 894 portid = rx_queue->port_id; 895 queueid = rx_queue->queue_id; 896 data = portid << CHAR_BIT | queueid; 897 898 ret = rte_eth_dev_rx_intr_ctl_q(portid, queueid, 899 RTE_EPOLL_PER_THREAD, 900 RTE_INTR_EVENT_ADD, 901 (void *)((uintptr_t)data)); 902 if (ret) 903 return ret; 904 } 905 906 return 0; 907 } 908 909 /* main processing loop */ 910 static int main_intr_loop(__rte_unused void *dummy) 911 { 912 struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; 913 unsigned int lcore_id; 914 uint64_t prev_tsc, diff_tsc, cur_tsc; 915 int i, j, nb_rx; 916 uint8_t queueid; 917 uint16_t portid; 918 struct lcore_conf *qconf; 919 struct lcore_rx_queue *rx_queue; 920 uint32_t lcore_rx_idle_count = 0; 921 uint32_t lcore_idle_hint = 0; 922 int intr_en = 0; 923 924 const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / 925 US_PER_S * BURST_TX_DRAIN_US; 926 927 prev_tsc = 0; 928 929 lcore_id = rte_lcore_id(); 930 qconf = &lcore_conf[lcore_id]; 931 932 if (qconf->n_rx_queue == 0) { 933 RTE_LOG(INFO, L3FWD_POWER, "lcore %u has nothing to do\n", 934 lcore_id); 935 return 0; 936 } 937 938 RTE_LOG(INFO, L3FWD_POWER, "entering main interrupt loop on lcore %u\n", 939 lcore_id); 940 941 for (i = 0; i < qconf->n_rx_queue; i++) { 942 portid = qconf->rx_queue_list[i].port_id; 943 queueid = qconf->rx_queue_list[i].queue_id; 944 RTE_LOG(INFO, L3FWD_POWER, 945 " -- lcoreid=%u portid=%u rxqueueid=%hhu\n", 946 lcore_id, portid, queueid); 947 } 948 949 /* add into event wait list */ 950 if (event_register(qconf) == 0) 951 intr_en = 1; 952 else 953 RTE_LOG(INFO, L3FWD_POWER, "RX interrupt won't enable.\n"); 954 955 while (!is_done()) { 956 stats[lcore_id].nb_iteration_looped++; 957 958 cur_tsc = rte_rdtsc(); 959 960 /* 961 * TX burst queue drain 962 */ 963 diff_tsc = cur_tsc - prev_tsc; 964 if (unlikely(diff_tsc > drain_tsc)) { 965 for (i = 0; i < qconf->n_tx_port; ++i) { 966 portid = qconf->tx_port_id[i]; 967 rte_eth_tx_buffer_flush(portid, 968 qconf->tx_queue_id[portid], 969 qconf->tx_buffer[portid]); 970 } 971 prev_tsc = cur_tsc; 972 } 973 974 start_rx: 975 /* 976 * Read packet from RX queues 977 */ 978 lcore_rx_idle_count = 0; 979 for (i = 0; i < qconf->n_rx_queue; ++i) { 980 rx_queue = &(qconf->rx_queue_list[i]); 981 rx_queue->idle_hint = 0; 982 portid = rx_queue->port_id; 983 queueid = rx_queue->queue_id; 984 985 nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst, 986 MAX_PKT_BURST); 987 988 stats[lcore_id].nb_rx_processed += nb_rx; 989 if (unlikely(nb_rx == 0)) { 990 /** 991 * no packet received from rx queue, try to 992 * sleep for a while forcing CPU enter deeper 993 * C states. 994 */ 995 rx_queue->zero_rx_packet_count++; 996 997 if (rx_queue->zero_rx_packet_count <= 998 MIN_ZERO_POLL_COUNT) 999 continue; 1000 1001 rx_queue->idle_hint = power_idle_heuristic( 1002 rx_queue->zero_rx_packet_count); 1003 lcore_rx_idle_count++; 1004 } else { 1005 rx_queue->zero_rx_packet_count = 0; 1006 } 1007 1008 /* Prefetch first packets */ 1009 for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++) { 1010 rte_prefetch0(rte_pktmbuf_mtod( 1011 pkts_burst[j], void *)); 1012 } 1013 1014 /* Prefetch and forward already prefetched packets */ 1015 for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) { 1016 rte_prefetch0(rte_pktmbuf_mtod( 1017 pkts_burst[j + PREFETCH_OFFSET], 1018 void *)); 1019 l3fwd_simple_forward( 1020 pkts_burst[j], portid, qconf); 1021 } 1022 1023 /* Forward remaining prefetched packets */ 1024 for (; j < nb_rx; j++) { 1025 l3fwd_simple_forward( 1026 pkts_burst[j], portid, qconf); 1027 } 1028 } 1029 1030 if (unlikely(lcore_rx_idle_count == qconf->n_rx_queue)) { 1031 /** 1032 * All Rx queues empty in recent consecutive polls, 1033 * sleep in a conservative manner, meaning sleep as 1034 * less as possible. 1035 */ 1036 for (i = 1, 1037 lcore_idle_hint = qconf->rx_queue_list[0].idle_hint; 1038 i < qconf->n_rx_queue; ++i) { 1039 rx_queue = &(qconf->rx_queue_list[i]); 1040 if (rx_queue->idle_hint < lcore_idle_hint) 1041 lcore_idle_hint = rx_queue->idle_hint; 1042 } 1043 1044 if (lcore_idle_hint < SUSPEND_THRESHOLD) 1045 /** 1046 * execute "pause" instruction to avoid context 1047 * switch which generally take hundred of 1048 * microseconds for short sleep. 1049 */ 1050 rte_delay_us(lcore_idle_hint); 1051 else { 1052 /* suspend until rx interrupt triggers */ 1053 if (intr_en) { 1054 turn_on_off_intr(qconf, 1); 1055 sleep_until_rx_interrupt( 1056 qconf->n_rx_queue, 1057 lcore_id); 1058 turn_on_off_intr(qconf, 0); 1059 /** 1060 * start receiving packets immediately 1061 */ 1062 if (likely(!is_done())) 1063 goto start_rx; 1064 } 1065 } 1066 stats[lcore_id].sleep_time += lcore_idle_hint; 1067 } 1068 } 1069 1070 return 0; 1071 } 1072 1073 /* main processing loop */ 1074 static int 1075 main_telemetry_loop(__rte_unused void *dummy) 1076 { 1077 struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; 1078 unsigned int lcore_id; 1079 uint64_t prev_tsc, diff_tsc, cur_tsc, prev_tel_tsc; 1080 int i, j, nb_rx; 1081 uint8_t queueid; 1082 uint16_t portid; 1083 struct lcore_conf *qconf; 1084 struct lcore_rx_queue *rx_queue; 1085 uint64_t ep_nep[2] = {0}, fp_nfp[2] = {0}; 1086 uint64_t poll_count; 1087 enum busy_rate br; 1088 1089 const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / 1090 US_PER_S * BURST_TX_DRAIN_US; 1091 1092 poll_count = 0; 1093 prev_tsc = 0; 1094 prev_tel_tsc = 0; 1095 1096 lcore_id = rte_lcore_id(); 1097 qconf = &lcore_conf[lcore_id]; 1098 1099 if (qconf->n_rx_queue == 0) { 1100 RTE_LOG(INFO, L3FWD_POWER, "lcore %u has nothing to do\n", 1101 lcore_id); 1102 return 0; 1103 } 1104 1105 RTE_LOG(INFO, L3FWD_POWER, "entering main telemetry loop on lcore %u\n", 1106 lcore_id); 1107 1108 for (i = 0; i < qconf->n_rx_queue; i++) { 1109 portid = qconf->rx_queue_list[i].port_id; 1110 queueid = qconf->rx_queue_list[i].queue_id; 1111 RTE_LOG(INFO, L3FWD_POWER, " -- lcoreid=%u portid=%u " 1112 "rxqueueid=%hhu\n", lcore_id, portid, queueid); 1113 } 1114 1115 while (!is_done()) { 1116 1117 cur_tsc = rte_rdtsc(); 1118 /* 1119 * TX burst queue drain 1120 */ 1121 diff_tsc = cur_tsc - prev_tsc; 1122 if (unlikely(diff_tsc > drain_tsc)) { 1123 for (i = 0; i < qconf->n_tx_port; ++i) { 1124 portid = qconf->tx_port_id[i]; 1125 rte_eth_tx_buffer_flush(portid, 1126 qconf->tx_queue_id[portid], 1127 qconf->tx_buffer[portid]); 1128 } 1129 prev_tsc = cur_tsc; 1130 } 1131 1132 /* 1133 * Read packet from RX queues 1134 */ 1135 for (i = 0; i < qconf->n_rx_queue; ++i) { 1136 rx_queue = &(qconf->rx_queue_list[i]); 1137 portid = rx_queue->port_id; 1138 queueid = rx_queue->queue_id; 1139 1140 nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst, 1141 MAX_PKT_BURST); 1142 ep_nep[nb_rx == 0]++; 1143 fp_nfp[nb_rx == MAX_PKT_BURST]++; 1144 poll_count++; 1145 if (unlikely(nb_rx == 0)) 1146 continue; 1147 1148 /* Prefetch first packets */ 1149 for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++) { 1150 rte_prefetch0(rte_pktmbuf_mtod( 1151 pkts_burst[j], void *)); 1152 } 1153 1154 /* Prefetch and forward already prefetched packets */ 1155 for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) { 1156 rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[ 1157 j + PREFETCH_OFFSET], void *)); 1158 l3fwd_simple_forward(pkts_burst[j], portid, 1159 qconf); 1160 } 1161 1162 /* Forward remaining prefetched packets */ 1163 for (; j < nb_rx; j++) { 1164 l3fwd_simple_forward(pkts_burst[j], portid, 1165 qconf); 1166 } 1167 } 1168 if (unlikely(poll_count >= DEFAULT_COUNT)) { 1169 diff_tsc = cur_tsc - prev_tel_tsc; 1170 if (diff_tsc >= MAX_CYCLES) { 1171 br = FULL; 1172 } else if (diff_tsc > MIN_CYCLES && 1173 diff_tsc < MAX_CYCLES) { 1174 br = (diff_tsc * 100) / MAX_CYCLES; 1175 } else { 1176 br = ZERO; 1177 } 1178 poll_count = 0; 1179 prev_tel_tsc = cur_tsc; 1180 /* update stats for telemetry */ 1181 rte_spinlock_lock(&stats[lcore_id].telemetry_lock); 1182 stats[lcore_id].ep_nep[0] = ep_nep[0]; 1183 stats[lcore_id].ep_nep[1] = ep_nep[1]; 1184 stats[lcore_id].fp_nfp[0] = fp_nfp[0]; 1185 stats[lcore_id].fp_nfp[1] = fp_nfp[1]; 1186 stats[lcore_id].br = br; 1187 rte_spinlock_unlock(&stats[lcore_id].telemetry_lock); 1188 } 1189 } 1190 1191 return 0; 1192 } 1193 /* main processing loop */ 1194 static int 1195 main_empty_poll_loop(__rte_unused void *dummy) 1196 { 1197 struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; 1198 unsigned int lcore_id; 1199 uint64_t prev_tsc, diff_tsc, cur_tsc; 1200 int i, j, nb_rx; 1201 uint8_t queueid; 1202 uint16_t portid; 1203 struct lcore_conf *qconf; 1204 struct lcore_rx_queue *rx_queue; 1205 1206 const uint64_t drain_tsc = 1207 (rte_get_tsc_hz() + US_PER_S - 1) / 1208 US_PER_S * BURST_TX_DRAIN_US; 1209 1210 prev_tsc = 0; 1211 1212 lcore_id = rte_lcore_id(); 1213 qconf = &lcore_conf[lcore_id]; 1214 1215 if (qconf->n_rx_queue == 0) { 1216 RTE_LOG(INFO, L3FWD_POWER, "lcore %u has nothing to do\n", 1217 lcore_id); 1218 return 0; 1219 } 1220 1221 for (i = 0; i < qconf->n_rx_queue; i++) { 1222 portid = qconf->rx_queue_list[i].port_id; 1223 queueid = qconf->rx_queue_list[i].queue_id; 1224 RTE_LOG(INFO, L3FWD_POWER, " -- lcoreid=%u portid=%u " 1225 "rxqueueid=%hhu\n", lcore_id, portid, queueid); 1226 } 1227 1228 while (!is_done()) { 1229 stats[lcore_id].nb_iteration_looped++; 1230 1231 cur_tsc = rte_rdtsc(); 1232 /* 1233 * TX burst queue drain 1234 */ 1235 diff_tsc = cur_tsc - prev_tsc; 1236 if (unlikely(diff_tsc > drain_tsc)) { 1237 for (i = 0; i < qconf->n_tx_port; ++i) { 1238 portid = qconf->tx_port_id[i]; 1239 rte_eth_tx_buffer_flush(portid, 1240 qconf->tx_queue_id[portid], 1241 qconf->tx_buffer[portid]); 1242 } 1243 prev_tsc = cur_tsc; 1244 } 1245 1246 /* 1247 * Read packet from RX queues 1248 */ 1249 for (i = 0; i < qconf->n_rx_queue; ++i) { 1250 rx_queue = &(qconf->rx_queue_list[i]); 1251 rx_queue->idle_hint = 0; 1252 portid = rx_queue->port_id; 1253 queueid = rx_queue->queue_id; 1254 1255 nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst, 1256 MAX_PKT_BURST); 1257 1258 stats[lcore_id].nb_rx_processed += nb_rx; 1259 1260 if (nb_rx == 0) { 1261 1262 rte_power_empty_poll_stat_update(lcore_id); 1263 1264 continue; 1265 } else { 1266 rte_power_poll_stat_update(lcore_id, nb_rx); 1267 } 1268 1269 1270 /* Prefetch first packets */ 1271 for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++) { 1272 rte_prefetch0(rte_pktmbuf_mtod( 1273 pkts_burst[j], void *)); 1274 } 1275 1276 /* Prefetch and forward already prefetched packets */ 1277 for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) { 1278 rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[ 1279 j + PREFETCH_OFFSET], 1280 void *)); 1281 l3fwd_simple_forward(pkts_burst[j], portid, 1282 qconf); 1283 } 1284 1285 /* Forward remaining prefetched packets */ 1286 for (; j < nb_rx; j++) { 1287 l3fwd_simple_forward(pkts_burst[j], portid, 1288 qconf); 1289 } 1290 1291 } 1292 1293 } 1294 1295 return 0; 1296 } 1297 /* main processing loop */ 1298 static int 1299 main_legacy_loop(__rte_unused void *dummy) 1300 { 1301 struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; 1302 unsigned lcore_id; 1303 uint64_t prev_tsc, diff_tsc, cur_tsc, tim_res_tsc, hz; 1304 uint64_t prev_tsc_power = 0, cur_tsc_power, diff_tsc_power; 1305 int i, j, nb_rx; 1306 uint8_t queueid; 1307 uint16_t portid; 1308 struct lcore_conf *qconf; 1309 struct lcore_rx_queue *rx_queue; 1310 enum freq_scale_hint_t lcore_scaleup_hint; 1311 uint32_t lcore_rx_idle_count = 0; 1312 uint32_t lcore_idle_hint = 0; 1313 int intr_en = 0; 1314 1315 const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * BURST_TX_DRAIN_US; 1316 1317 prev_tsc = 0; 1318 hz = rte_get_timer_hz(); 1319 tim_res_tsc = hz/TIMER_NUMBER_PER_SECOND; 1320 1321 lcore_id = rte_lcore_id(); 1322 qconf = &lcore_conf[lcore_id]; 1323 1324 if (qconf->n_rx_queue == 0) { 1325 RTE_LOG(INFO, L3FWD_POWER, "lcore %u has nothing to do\n", lcore_id); 1326 return 0; 1327 } 1328 1329 RTE_LOG(INFO, L3FWD_POWER, "entering main loop on lcore %u\n", lcore_id); 1330 1331 for (i = 0; i < qconf->n_rx_queue; i++) { 1332 portid = qconf->rx_queue_list[i].port_id; 1333 queueid = qconf->rx_queue_list[i].queue_id; 1334 RTE_LOG(INFO, L3FWD_POWER, " -- lcoreid=%u portid=%u " 1335 "rxqueueid=%hhu\n", lcore_id, portid, queueid); 1336 } 1337 1338 /* add into event wait list */ 1339 if (event_register(qconf) == 0) 1340 intr_en = 1; 1341 else 1342 RTE_LOG(INFO, L3FWD_POWER, "RX interrupt won't enable.\n"); 1343 1344 while (!is_done()) { 1345 stats[lcore_id].nb_iteration_looped++; 1346 1347 cur_tsc = rte_rdtsc(); 1348 cur_tsc_power = cur_tsc; 1349 1350 /* 1351 * TX burst queue drain 1352 */ 1353 diff_tsc = cur_tsc - prev_tsc; 1354 if (unlikely(diff_tsc > drain_tsc)) { 1355 for (i = 0; i < qconf->n_tx_port; ++i) { 1356 portid = qconf->tx_port_id[i]; 1357 rte_eth_tx_buffer_flush(portid, 1358 qconf->tx_queue_id[portid], 1359 qconf->tx_buffer[portid]); 1360 } 1361 prev_tsc = cur_tsc; 1362 } 1363 1364 diff_tsc_power = cur_tsc_power - prev_tsc_power; 1365 if (diff_tsc_power > tim_res_tsc) { 1366 rte_timer_manage(); 1367 prev_tsc_power = cur_tsc_power; 1368 } 1369 1370 start_rx: 1371 /* 1372 * Read packet from RX queues 1373 */ 1374 lcore_scaleup_hint = FREQ_CURRENT; 1375 lcore_rx_idle_count = 0; 1376 for (i = 0; i < qconf->n_rx_queue; ++i) { 1377 rx_queue = &(qconf->rx_queue_list[i]); 1378 rx_queue->idle_hint = 0; 1379 portid = rx_queue->port_id; 1380 queueid = rx_queue->queue_id; 1381 1382 nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst, 1383 MAX_PKT_BURST); 1384 1385 stats[lcore_id].nb_rx_processed += nb_rx; 1386 if (unlikely(nb_rx == 0)) { 1387 /** 1388 * no packet received from rx queue, try to 1389 * sleep for a while forcing CPU enter deeper 1390 * C states. 1391 */ 1392 rx_queue->zero_rx_packet_count++; 1393 1394 if (rx_queue->zero_rx_packet_count <= 1395 MIN_ZERO_POLL_COUNT) 1396 continue; 1397 1398 rx_queue->idle_hint = power_idle_heuristic(\ 1399 rx_queue->zero_rx_packet_count); 1400 lcore_rx_idle_count++; 1401 } else { 1402 rx_queue->zero_rx_packet_count = 0; 1403 1404 /** 1405 * do not scale up frequency immediately as 1406 * user to kernel space communication is costly 1407 * which might impact packet I/O for received 1408 * packets. 1409 */ 1410 rx_queue->freq_up_hint = 1411 power_freq_scaleup_heuristic(lcore_id, 1412 portid, queueid); 1413 } 1414 1415 /* Prefetch first packets */ 1416 for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++) { 1417 rte_prefetch0(rte_pktmbuf_mtod( 1418 pkts_burst[j], void *)); 1419 } 1420 1421 /* Prefetch and forward already prefetched packets */ 1422 for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) { 1423 rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[ 1424 j + PREFETCH_OFFSET], void *)); 1425 l3fwd_simple_forward(pkts_burst[j], portid, 1426 qconf); 1427 } 1428 1429 /* Forward remaining prefetched packets */ 1430 for (; j < nb_rx; j++) { 1431 l3fwd_simple_forward(pkts_burst[j], portid, 1432 qconf); 1433 } 1434 } 1435 1436 if (likely(lcore_rx_idle_count != qconf->n_rx_queue)) { 1437 for (i = 1, lcore_scaleup_hint = 1438 qconf->rx_queue_list[0].freq_up_hint; 1439 i < qconf->n_rx_queue; ++i) { 1440 rx_queue = &(qconf->rx_queue_list[i]); 1441 if (rx_queue->freq_up_hint > 1442 lcore_scaleup_hint) 1443 lcore_scaleup_hint = 1444 rx_queue->freq_up_hint; 1445 } 1446 1447 if (lcore_scaleup_hint == FREQ_HIGHEST) { 1448 if (rte_power_freq_max) 1449 rte_power_freq_max(lcore_id); 1450 } else if (lcore_scaleup_hint == FREQ_HIGHER) { 1451 if (rte_power_freq_up) 1452 rte_power_freq_up(lcore_id); 1453 } 1454 } else { 1455 /** 1456 * All Rx queues empty in recent consecutive polls, 1457 * sleep in a conservative manner, meaning sleep as 1458 * less as possible. 1459 */ 1460 for (i = 1, lcore_idle_hint = 1461 qconf->rx_queue_list[0].idle_hint; 1462 i < qconf->n_rx_queue; ++i) { 1463 rx_queue = &(qconf->rx_queue_list[i]); 1464 if (rx_queue->idle_hint < lcore_idle_hint) 1465 lcore_idle_hint = rx_queue->idle_hint; 1466 } 1467 1468 if (lcore_idle_hint < SUSPEND_THRESHOLD) 1469 /** 1470 * execute "pause" instruction to avoid context 1471 * switch which generally take hundred of 1472 * microseconds for short sleep. 1473 */ 1474 rte_delay_us(lcore_idle_hint); 1475 else { 1476 /* suspend until rx interrupt triggers */ 1477 if (intr_en) { 1478 turn_on_off_intr(qconf, 1); 1479 sleep_until_rx_interrupt( 1480 qconf->n_rx_queue, 1481 lcore_id); 1482 turn_on_off_intr(qconf, 0); 1483 /** 1484 * start receiving packets immediately 1485 */ 1486 if (likely(!is_done())) 1487 goto start_rx; 1488 } 1489 } 1490 stats[lcore_id].sleep_time += lcore_idle_hint; 1491 } 1492 } 1493 1494 return 0; 1495 } 1496 1497 static int 1498 check_lcore_params(void) 1499 { 1500 uint8_t queue, lcore; 1501 uint16_t i; 1502 int socketid; 1503 1504 for (i = 0; i < nb_lcore_params; ++i) { 1505 queue = lcore_params[i].queue_id; 1506 if (queue >= MAX_RX_QUEUE_PER_PORT) { 1507 printf("invalid queue number: %hhu\n", queue); 1508 return -1; 1509 } 1510 lcore = lcore_params[i].lcore_id; 1511 if (!rte_lcore_is_enabled(lcore)) { 1512 printf("error: lcore %hhu is not enabled in lcore " 1513 "mask\n", lcore); 1514 return -1; 1515 } 1516 if ((socketid = rte_lcore_to_socket_id(lcore) != 0) && 1517 (numa_on == 0)) { 1518 printf("warning: lcore %hhu is on socket %d with numa " 1519 "off\n", lcore, socketid); 1520 } 1521 if (app_mode == APP_MODE_TELEMETRY && lcore == rte_lcore_id()) { 1522 printf("cannot enable main core %d in config for telemetry mode\n", 1523 rte_lcore_id()); 1524 return -1; 1525 } 1526 } 1527 return 0; 1528 } 1529 1530 static int 1531 check_port_config(void) 1532 { 1533 unsigned portid; 1534 uint16_t i; 1535 1536 for (i = 0; i < nb_lcore_params; ++i) { 1537 portid = lcore_params[i].port_id; 1538 if ((enabled_port_mask & (1 << portid)) == 0) { 1539 printf("port %u is not enabled in port mask\n", 1540 portid); 1541 return -1; 1542 } 1543 if (!rte_eth_dev_is_valid_port(portid)) { 1544 printf("port %u is not present on the board\n", 1545 portid); 1546 return -1; 1547 } 1548 } 1549 return 0; 1550 } 1551 1552 static uint8_t 1553 get_port_n_rx_queues(const uint16_t port) 1554 { 1555 int queue = -1; 1556 uint16_t i; 1557 1558 for (i = 0; i < nb_lcore_params; ++i) { 1559 if (lcore_params[i].port_id == port && 1560 lcore_params[i].queue_id > queue) 1561 queue = lcore_params[i].queue_id; 1562 } 1563 return (uint8_t)(++queue); 1564 } 1565 1566 static int 1567 init_lcore_rx_queues(void) 1568 { 1569 uint16_t i, nb_rx_queue; 1570 uint8_t lcore; 1571 1572 for (i = 0; i < nb_lcore_params; ++i) { 1573 lcore = lcore_params[i].lcore_id; 1574 nb_rx_queue = lcore_conf[lcore].n_rx_queue; 1575 if (nb_rx_queue >= MAX_RX_QUEUE_PER_LCORE) { 1576 printf("error: too many queues (%u) for lcore: %u\n", 1577 (unsigned)nb_rx_queue + 1, (unsigned)lcore); 1578 return -1; 1579 } else { 1580 lcore_conf[lcore].rx_queue_list[nb_rx_queue].port_id = 1581 lcore_params[i].port_id; 1582 lcore_conf[lcore].rx_queue_list[nb_rx_queue].queue_id = 1583 lcore_params[i].queue_id; 1584 lcore_conf[lcore].n_rx_queue++; 1585 } 1586 } 1587 return 0; 1588 } 1589 1590 /* display usage */ 1591 static void 1592 print_usage(const char *prgname) 1593 { 1594 printf ("%s [EAL options] -- -p PORTMASK -P" 1595 " [--config (port,queue,lcore)[,(port,queue,lcore]]" 1596 " [--high-perf-cores CORELIST" 1597 " [--perf-config (port,queue,hi_perf,lcore_index)[,(port,queue,hi_perf,lcore_index]]" 1598 " [--enable-jumbo [--max-pkt-len PKTLEN]]\n" 1599 " -p PORTMASK: hexadecimal bitmask of ports to configure\n" 1600 " -P : enable promiscuous mode\n" 1601 " --config (port,queue,lcore): rx queues configuration\n" 1602 " --high-perf-cores CORELIST: list of high performance cores\n" 1603 " --perf-config: similar as config, cores specified as indices" 1604 " for bins containing high or regular performance cores\n" 1605 " --no-numa: optional, disable numa awareness\n" 1606 " --enable-jumbo: enable jumbo frame" 1607 " which max packet len is PKTLEN in decimal (64-9600)\n" 1608 " --parse-ptype: parse packet type by software\n" 1609 " --legacy: use legacy interrupt-based scaling\n" 1610 " --empty-poll: enable empty poll detection" 1611 " follow (training_flag, high_threshold, med_threshold)\n" 1612 " --telemetry: enable telemetry mode, to update" 1613 " empty polls, full polls, and core busyness to telemetry\n" 1614 " --interrupt-only: enable interrupt-only mode\n", 1615 prgname); 1616 } 1617 1618 static int parse_max_pkt_len(const char *pktlen) 1619 { 1620 char *end = NULL; 1621 unsigned long len; 1622 1623 /* parse decimal string */ 1624 len = strtoul(pktlen, &end, 10); 1625 if ((pktlen[0] == '\0') || (end == NULL) || (*end != '\0')) 1626 return -1; 1627 1628 if (len == 0) 1629 return -1; 1630 1631 return len; 1632 } 1633 1634 static int 1635 parse_portmask(const char *portmask) 1636 { 1637 char *end = NULL; 1638 unsigned long pm; 1639 1640 /* parse hexadecimal string */ 1641 pm = strtoul(portmask, &end, 16); 1642 if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0')) 1643 return 0; 1644 1645 return pm; 1646 } 1647 1648 static int 1649 parse_config(const char *q_arg) 1650 { 1651 char s[256]; 1652 const char *p, *p0 = q_arg; 1653 char *end; 1654 enum fieldnames { 1655 FLD_PORT = 0, 1656 FLD_QUEUE, 1657 FLD_LCORE, 1658 _NUM_FLD 1659 }; 1660 unsigned long int_fld[_NUM_FLD]; 1661 char *str_fld[_NUM_FLD]; 1662 int i; 1663 unsigned size; 1664 1665 nb_lcore_params = 0; 1666 1667 while ((p = strchr(p0,'(')) != NULL) { 1668 ++p; 1669 if((p0 = strchr(p,')')) == NULL) 1670 return -1; 1671 1672 size = p0 - p; 1673 if(size >= sizeof(s)) 1674 return -1; 1675 1676 snprintf(s, sizeof(s), "%.*s", size, p); 1677 if (rte_strsplit(s, sizeof(s), str_fld, _NUM_FLD, ',') != 1678 _NUM_FLD) 1679 return -1; 1680 for (i = 0; i < _NUM_FLD; i++){ 1681 errno = 0; 1682 int_fld[i] = strtoul(str_fld[i], &end, 0); 1683 if (errno != 0 || end == str_fld[i] || int_fld[i] > 1684 255) 1685 return -1; 1686 } 1687 if (nb_lcore_params >= MAX_LCORE_PARAMS) { 1688 printf("exceeded max number of lcore params: %hu\n", 1689 nb_lcore_params); 1690 return -1; 1691 } 1692 lcore_params_array[nb_lcore_params].port_id = 1693 (uint8_t)int_fld[FLD_PORT]; 1694 lcore_params_array[nb_lcore_params].queue_id = 1695 (uint8_t)int_fld[FLD_QUEUE]; 1696 lcore_params_array[nb_lcore_params].lcore_id = 1697 (uint8_t)int_fld[FLD_LCORE]; 1698 ++nb_lcore_params; 1699 } 1700 lcore_params = lcore_params_array; 1701 1702 return 0; 1703 } 1704 static int 1705 parse_ep_config(const char *q_arg) 1706 { 1707 char s[256]; 1708 const char *p = q_arg; 1709 char *end; 1710 int num_arg; 1711 1712 char *str_fld[3]; 1713 1714 int training_flag; 1715 int med_edpi; 1716 int hgh_edpi; 1717 1718 ep_med_edpi = EMPTY_POLL_MED_THRESHOLD; 1719 ep_hgh_edpi = EMPTY_POLL_MED_THRESHOLD; 1720 1721 strlcpy(s, p, sizeof(s)); 1722 1723 num_arg = rte_strsplit(s, sizeof(s), str_fld, 3, ','); 1724 1725 empty_poll_train = false; 1726 1727 if (num_arg == 0) 1728 return 0; 1729 1730 if (num_arg == 3) { 1731 1732 training_flag = strtoul(str_fld[0], &end, 0); 1733 med_edpi = strtoul(str_fld[1], &end, 0); 1734 hgh_edpi = strtoul(str_fld[2], &end, 0); 1735 1736 if (training_flag == 1) 1737 empty_poll_train = true; 1738 1739 if (med_edpi > 0) 1740 ep_med_edpi = med_edpi; 1741 1742 if (med_edpi > 0) 1743 ep_hgh_edpi = hgh_edpi; 1744 1745 } else { 1746 1747 return -1; 1748 } 1749 1750 return 0; 1751 1752 } 1753 #define CMD_LINE_OPT_PARSE_PTYPE "parse-ptype" 1754 #define CMD_LINE_OPT_LEGACY "legacy" 1755 #define CMD_LINE_OPT_EMPTY_POLL "empty-poll" 1756 #define CMD_LINE_OPT_INTERRUPT_ONLY "interrupt-only" 1757 #define CMD_LINE_OPT_TELEMETRY "telemetry" 1758 1759 /* Parse the argument given in the command line of the application */ 1760 static int 1761 parse_args(int argc, char **argv) 1762 { 1763 int opt, ret; 1764 char **argvopt; 1765 int option_index; 1766 uint32_t limit; 1767 char *prgname = argv[0]; 1768 static struct option lgopts[] = { 1769 {"config", 1, 0, 0}, 1770 {"perf-config", 1, 0, 0}, 1771 {"high-perf-cores", 1, 0, 0}, 1772 {"no-numa", 0, 0, 0}, 1773 {"enable-jumbo", 0, 0, 0}, 1774 {CMD_LINE_OPT_EMPTY_POLL, 1, 0, 0}, 1775 {CMD_LINE_OPT_PARSE_PTYPE, 0, 0, 0}, 1776 {CMD_LINE_OPT_LEGACY, 0, 0, 0}, 1777 {CMD_LINE_OPT_TELEMETRY, 0, 0, 0}, 1778 {CMD_LINE_OPT_INTERRUPT_ONLY, 0, 0, 0}, 1779 {NULL, 0, 0, 0} 1780 }; 1781 1782 argvopt = argv; 1783 1784 while ((opt = getopt_long(argc, argvopt, "p:l:m:h:P", 1785 lgopts, &option_index)) != EOF) { 1786 1787 switch (opt) { 1788 /* portmask */ 1789 case 'p': 1790 enabled_port_mask = parse_portmask(optarg); 1791 if (enabled_port_mask == 0) { 1792 printf("invalid portmask\n"); 1793 print_usage(prgname); 1794 return -1; 1795 } 1796 break; 1797 case 'P': 1798 printf("Promiscuous mode selected\n"); 1799 promiscuous_on = 1; 1800 break; 1801 case 'l': 1802 limit = parse_max_pkt_len(optarg); 1803 freq_tlb[LOW] = limit; 1804 break; 1805 case 'm': 1806 limit = parse_max_pkt_len(optarg); 1807 freq_tlb[MED] = limit; 1808 break; 1809 case 'h': 1810 limit = parse_max_pkt_len(optarg); 1811 freq_tlb[HGH] = limit; 1812 break; 1813 /* long options */ 1814 case 0: 1815 if (!strncmp(lgopts[option_index].name, "config", 6)) { 1816 ret = parse_config(optarg); 1817 if (ret) { 1818 printf("invalid config\n"); 1819 print_usage(prgname); 1820 return -1; 1821 } 1822 } 1823 1824 if (!strncmp(lgopts[option_index].name, 1825 "perf-config", 11)) { 1826 ret = parse_perf_config(optarg); 1827 if (ret) { 1828 printf("invalid perf-config\n"); 1829 print_usage(prgname); 1830 return -1; 1831 } 1832 } 1833 1834 if (!strncmp(lgopts[option_index].name, 1835 "high-perf-cores", 15)) { 1836 ret = parse_perf_core_list(optarg); 1837 if (ret) { 1838 printf("invalid high-perf-cores\n"); 1839 print_usage(prgname); 1840 return -1; 1841 } 1842 } 1843 1844 if (!strncmp(lgopts[option_index].name, 1845 "no-numa", 7)) { 1846 printf("numa is disabled \n"); 1847 numa_on = 0; 1848 } 1849 1850 if (!strncmp(lgopts[option_index].name, 1851 CMD_LINE_OPT_LEGACY, 1852 sizeof(CMD_LINE_OPT_LEGACY))) { 1853 if (app_mode != APP_MODE_DEFAULT) { 1854 printf(" legacy mode is mutually exclusive with other modes\n"); 1855 return -1; 1856 } 1857 app_mode = APP_MODE_LEGACY; 1858 printf("legacy mode is enabled\n"); 1859 } 1860 1861 if (!strncmp(lgopts[option_index].name, 1862 CMD_LINE_OPT_EMPTY_POLL, 10)) { 1863 if (app_mode != APP_MODE_DEFAULT) { 1864 printf(" empty-poll mode is mutually exclusive with other modes\n"); 1865 return -1; 1866 } 1867 app_mode = APP_MODE_EMPTY_POLL; 1868 ret = parse_ep_config(optarg); 1869 1870 if (ret) { 1871 printf("invalid empty poll config\n"); 1872 print_usage(prgname); 1873 return -1; 1874 } 1875 printf("empty-poll is enabled\n"); 1876 } 1877 1878 if (!strncmp(lgopts[option_index].name, 1879 CMD_LINE_OPT_TELEMETRY, 1880 sizeof(CMD_LINE_OPT_TELEMETRY))) { 1881 if (app_mode != APP_MODE_DEFAULT) { 1882 printf(" telemetry mode is mutually exclusive with other modes\n"); 1883 return -1; 1884 } 1885 app_mode = APP_MODE_TELEMETRY; 1886 printf("telemetry mode is enabled\n"); 1887 } 1888 1889 if (!strncmp(lgopts[option_index].name, 1890 CMD_LINE_OPT_INTERRUPT_ONLY, 1891 sizeof(CMD_LINE_OPT_INTERRUPT_ONLY))) { 1892 if (app_mode != APP_MODE_DEFAULT) { 1893 printf(" interrupt-only mode is mutually exclusive with other modes\n"); 1894 return -1; 1895 } 1896 app_mode = APP_MODE_INTERRUPT; 1897 printf("interrupt-only mode is enabled\n"); 1898 } 1899 1900 if (!strncmp(lgopts[option_index].name, 1901 "enable-jumbo", 12)) { 1902 struct option lenopts = 1903 {"max-pkt-len", required_argument, \ 1904 0, 0}; 1905 1906 printf("jumbo frame is enabled \n"); 1907 port_conf.rxmode.offloads |= 1908 DEV_RX_OFFLOAD_JUMBO_FRAME; 1909 port_conf.txmode.offloads |= 1910 DEV_TX_OFFLOAD_MULTI_SEGS; 1911 1912 /** 1913 * if no max-pkt-len set, use the default value 1914 * RTE_ETHER_MAX_LEN 1915 */ 1916 if (0 == getopt_long(argc, argvopt, "", 1917 &lenopts, &option_index)) { 1918 ret = parse_max_pkt_len(optarg); 1919 if ((ret < 64) || 1920 (ret > MAX_JUMBO_PKT_LEN)){ 1921 printf("invalid packet " 1922 "length\n"); 1923 print_usage(prgname); 1924 return -1; 1925 } 1926 port_conf.rxmode.max_rx_pkt_len = ret; 1927 } 1928 printf("set jumbo frame " 1929 "max packet length to %u\n", 1930 (unsigned int)port_conf.rxmode.max_rx_pkt_len); 1931 } 1932 1933 if (!strncmp(lgopts[option_index].name, 1934 CMD_LINE_OPT_PARSE_PTYPE, 1935 sizeof(CMD_LINE_OPT_PARSE_PTYPE))) { 1936 printf("soft parse-ptype is enabled\n"); 1937 parse_ptype = 1; 1938 } 1939 1940 break; 1941 1942 default: 1943 print_usage(prgname); 1944 return -1; 1945 } 1946 } 1947 1948 if (optind >= 0) 1949 argv[optind-1] = prgname; 1950 1951 ret = optind-1; 1952 optind = 1; /* reset getopt lib */ 1953 return ret; 1954 } 1955 1956 static void 1957 print_ethaddr(const char *name, const struct rte_ether_addr *eth_addr) 1958 { 1959 char buf[RTE_ETHER_ADDR_FMT_SIZE]; 1960 rte_ether_format_addr(buf, RTE_ETHER_ADDR_FMT_SIZE, eth_addr); 1961 printf("%s%s", name, buf); 1962 } 1963 1964 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 1965 static void 1966 setup_hash(int socketid) 1967 { 1968 struct rte_hash_parameters ipv4_l3fwd_hash_params = { 1969 .name = NULL, 1970 .entries = L3FWD_HASH_ENTRIES, 1971 .key_len = sizeof(struct ipv4_5tuple), 1972 .hash_func = DEFAULT_HASH_FUNC, 1973 .hash_func_init_val = 0, 1974 }; 1975 1976 struct rte_hash_parameters ipv6_l3fwd_hash_params = { 1977 .name = NULL, 1978 .entries = L3FWD_HASH_ENTRIES, 1979 .key_len = sizeof(struct ipv6_5tuple), 1980 .hash_func = DEFAULT_HASH_FUNC, 1981 .hash_func_init_val = 0, 1982 }; 1983 1984 unsigned i; 1985 int ret; 1986 char s[64]; 1987 1988 /* create ipv4 hash */ 1989 snprintf(s, sizeof(s), "ipv4_l3fwd_hash_%d", socketid); 1990 ipv4_l3fwd_hash_params.name = s; 1991 ipv4_l3fwd_hash_params.socket_id = socketid; 1992 ipv4_l3fwd_lookup_struct[socketid] = 1993 rte_hash_create(&ipv4_l3fwd_hash_params); 1994 if (ipv4_l3fwd_lookup_struct[socketid] == NULL) 1995 rte_exit(EXIT_FAILURE, "Unable to create the l3fwd hash on " 1996 "socket %d\n", socketid); 1997 1998 /* create ipv6 hash */ 1999 snprintf(s, sizeof(s), "ipv6_l3fwd_hash_%d", socketid); 2000 ipv6_l3fwd_hash_params.name = s; 2001 ipv6_l3fwd_hash_params.socket_id = socketid; 2002 ipv6_l3fwd_lookup_struct[socketid] = 2003 rte_hash_create(&ipv6_l3fwd_hash_params); 2004 if (ipv6_l3fwd_lookup_struct[socketid] == NULL) 2005 rte_exit(EXIT_FAILURE, "Unable to create the l3fwd hash on " 2006 "socket %d\n", socketid); 2007 2008 2009 /* populate the ipv4 hash */ 2010 for (i = 0; i < RTE_DIM(ipv4_l3fwd_route_array); i++) { 2011 ret = rte_hash_add_key (ipv4_l3fwd_lookup_struct[socketid], 2012 (void *) &ipv4_l3fwd_route_array[i].key); 2013 if (ret < 0) { 2014 rte_exit(EXIT_FAILURE, "Unable to add entry %u to the" 2015 "l3fwd hash on socket %d\n", i, socketid); 2016 } 2017 ipv4_l3fwd_out_if[ret] = ipv4_l3fwd_route_array[i].if_out; 2018 printf("Hash: Adding key\n"); 2019 print_ipv4_key(ipv4_l3fwd_route_array[i].key); 2020 } 2021 2022 /* populate the ipv6 hash */ 2023 for (i = 0; i < RTE_DIM(ipv6_l3fwd_route_array); i++) { 2024 ret = rte_hash_add_key (ipv6_l3fwd_lookup_struct[socketid], 2025 (void *) &ipv6_l3fwd_route_array[i].key); 2026 if (ret < 0) { 2027 rte_exit(EXIT_FAILURE, "Unable to add entry %u to the" 2028 "l3fwd hash on socket %d\n", i, socketid); 2029 } 2030 ipv6_l3fwd_out_if[ret] = ipv6_l3fwd_route_array[i].if_out; 2031 printf("Hash: Adding key\n"); 2032 print_ipv6_key(ipv6_l3fwd_route_array[i].key); 2033 } 2034 } 2035 #endif 2036 2037 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 2038 static void 2039 setup_lpm(int socketid) 2040 { 2041 unsigned i; 2042 int ret; 2043 char s[64]; 2044 2045 /* create the LPM table */ 2046 struct rte_lpm_config lpm_ipv4_config; 2047 2048 lpm_ipv4_config.max_rules = IPV4_L3FWD_LPM_MAX_RULES; 2049 lpm_ipv4_config.number_tbl8s = 256; 2050 lpm_ipv4_config.flags = 0; 2051 2052 snprintf(s, sizeof(s), "IPV4_L3FWD_LPM_%d", socketid); 2053 ipv4_l3fwd_lookup_struct[socketid] = 2054 rte_lpm_create(s, socketid, &lpm_ipv4_config); 2055 if (ipv4_l3fwd_lookup_struct[socketid] == NULL) 2056 rte_exit(EXIT_FAILURE, "Unable to create the l3fwd LPM table" 2057 " on socket %d\n", socketid); 2058 2059 /* populate the LPM table */ 2060 for (i = 0; i < RTE_DIM(ipv4_l3fwd_route_array); i++) { 2061 ret = rte_lpm_add(ipv4_l3fwd_lookup_struct[socketid], 2062 ipv4_l3fwd_route_array[i].ip, 2063 ipv4_l3fwd_route_array[i].depth, 2064 ipv4_l3fwd_route_array[i].if_out); 2065 2066 if (ret < 0) { 2067 rte_exit(EXIT_FAILURE, "Unable to add entry %u to the " 2068 "l3fwd LPM table on socket %d\n", 2069 i, socketid); 2070 } 2071 2072 printf("LPM: Adding route 0x%08x / %d (%d)\n", 2073 (unsigned)ipv4_l3fwd_route_array[i].ip, 2074 ipv4_l3fwd_route_array[i].depth, 2075 ipv4_l3fwd_route_array[i].if_out); 2076 } 2077 } 2078 #endif 2079 2080 static int 2081 init_mem(unsigned nb_mbuf) 2082 { 2083 struct lcore_conf *qconf; 2084 int socketid; 2085 unsigned lcore_id; 2086 char s[64]; 2087 2088 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { 2089 if (rte_lcore_is_enabled(lcore_id) == 0) 2090 continue; 2091 2092 if (numa_on) 2093 socketid = rte_lcore_to_socket_id(lcore_id); 2094 else 2095 socketid = 0; 2096 2097 if (socketid >= NB_SOCKETS) { 2098 rte_exit(EXIT_FAILURE, "Socket %d of lcore %u is " 2099 "out of range %d\n", socketid, 2100 lcore_id, NB_SOCKETS); 2101 } 2102 if (pktmbuf_pool[socketid] == NULL) { 2103 snprintf(s, sizeof(s), "mbuf_pool_%d", socketid); 2104 pktmbuf_pool[socketid] = 2105 rte_pktmbuf_pool_create(s, nb_mbuf, 2106 MEMPOOL_CACHE_SIZE, 0, 2107 RTE_MBUF_DEFAULT_BUF_SIZE, 2108 socketid); 2109 if (pktmbuf_pool[socketid] == NULL) 2110 rte_exit(EXIT_FAILURE, 2111 "Cannot init mbuf pool on socket %d\n", 2112 socketid); 2113 else 2114 printf("Allocated mbuf pool on socket %d\n", 2115 socketid); 2116 2117 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 2118 setup_lpm(socketid); 2119 #else 2120 setup_hash(socketid); 2121 #endif 2122 } 2123 qconf = &lcore_conf[lcore_id]; 2124 qconf->ipv4_lookup_struct = ipv4_l3fwd_lookup_struct[socketid]; 2125 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 2126 qconf->ipv6_lookup_struct = ipv6_l3fwd_lookup_struct[socketid]; 2127 #endif 2128 } 2129 return 0; 2130 } 2131 2132 /* Check the link status of all ports in up to 9s, and print them finally */ 2133 static void 2134 check_all_ports_link_status(uint32_t port_mask) 2135 { 2136 #define CHECK_INTERVAL 100 /* 100ms */ 2137 #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */ 2138 uint8_t count, all_ports_up, print_flag = 0; 2139 uint16_t portid; 2140 struct rte_eth_link link; 2141 int ret; 2142 char link_status_text[RTE_ETH_LINK_MAX_STR_LEN]; 2143 2144 printf("\nChecking link status"); 2145 fflush(stdout); 2146 for (count = 0; count <= MAX_CHECK_TIME; count++) { 2147 all_ports_up = 1; 2148 RTE_ETH_FOREACH_DEV(portid) { 2149 if ((port_mask & (1 << portid)) == 0) 2150 continue; 2151 memset(&link, 0, sizeof(link)); 2152 ret = rte_eth_link_get_nowait(portid, &link); 2153 if (ret < 0) { 2154 all_ports_up = 0; 2155 if (print_flag == 1) 2156 printf("Port %u link get failed: %s\n", 2157 portid, rte_strerror(-ret)); 2158 continue; 2159 } 2160 /* print link status if flag set */ 2161 if (print_flag == 1) { 2162 rte_eth_link_to_str(link_status_text, 2163 sizeof(link_status_text), &link); 2164 printf("Port %d %s\n", portid, 2165 link_status_text); 2166 continue; 2167 } 2168 /* clear all_ports_up flag if any link down */ 2169 if (link.link_status == ETH_LINK_DOWN) { 2170 all_ports_up = 0; 2171 break; 2172 } 2173 } 2174 /* after finally printing all link status, get out */ 2175 if (print_flag == 1) 2176 break; 2177 2178 if (all_ports_up == 0) { 2179 printf("."); 2180 fflush(stdout); 2181 rte_delay_ms(CHECK_INTERVAL); 2182 } 2183 2184 /* set the print_flag if all ports up or timeout */ 2185 if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) { 2186 print_flag = 1; 2187 printf("done\n"); 2188 } 2189 } 2190 } 2191 2192 static int check_ptype(uint16_t portid) 2193 { 2194 int i, ret; 2195 int ptype_l3_ipv4 = 0; 2196 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 2197 int ptype_l3_ipv6 = 0; 2198 #endif 2199 uint32_t ptype_mask = RTE_PTYPE_L3_MASK; 2200 2201 ret = rte_eth_dev_get_supported_ptypes(portid, ptype_mask, NULL, 0); 2202 if (ret <= 0) 2203 return 0; 2204 2205 uint32_t ptypes[ret]; 2206 2207 ret = rte_eth_dev_get_supported_ptypes(portid, ptype_mask, ptypes, ret); 2208 for (i = 0; i < ret; ++i) { 2209 if (ptypes[i] & RTE_PTYPE_L3_IPV4) 2210 ptype_l3_ipv4 = 1; 2211 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 2212 if (ptypes[i] & RTE_PTYPE_L3_IPV6) 2213 ptype_l3_ipv6 = 1; 2214 #endif 2215 } 2216 2217 if (ptype_l3_ipv4 == 0) 2218 printf("port %d cannot parse RTE_PTYPE_L3_IPV4\n", portid); 2219 2220 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 2221 if (ptype_l3_ipv6 == 0) 2222 printf("port %d cannot parse RTE_PTYPE_L3_IPV6\n", portid); 2223 #endif 2224 2225 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 2226 if (ptype_l3_ipv4) 2227 #else /* APP_LOOKUP_EXACT_MATCH */ 2228 if (ptype_l3_ipv4 && ptype_l3_ipv6) 2229 #endif 2230 return 1; 2231 2232 return 0; 2233 2234 } 2235 2236 static int 2237 init_power_library(void) 2238 { 2239 enum power_management_env env; 2240 unsigned int lcore_id; 2241 int ret = 0; 2242 2243 RTE_LCORE_FOREACH(lcore_id) { 2244 /* init power management library */ 2245 ret = rte_power_init(lcore_id); 2246 if (ret) { 2247 RTE_LOG(ERR, POWER, 2248 "Library initialization failed on core %u\n", 2249 lcore_id); 2250 return ret; 2251 } 2252 /* we're not supporting the VM channel mode */ 2253 env = rte_power_get_env(); 2254 if (env != PM_ENV_ACPI_CPUFREQ && 2255 env != PM_ENV_PSTATE_CPUFREQ) { 2256 RTE_LOG(ERR, POWER, 2257 "Only ACPI and PSTATE mode are supported\n"); 2258 return -1; 2259 } 2260 } 2261 return ret; 2262 } 2263 2264 static int 2265 deinit_power_library(void) 2266 { 2267 unsigned int lcore_id; 2268 int ret = 0; 2269 2270 RTE_LCORE_FOREACH(lcore_id) { 2271 /* deinit power management library */ 2272 ret = rte_power_exit(lcore_id); 2273 if (ret) { 2274 RTE_LOG(ERR, POWER, 2275 "Library deinitialization failed on core %u\n", 2276 lcore_id); 2277 return ret; 2278 } 2279 } 2280 return ret; 2281 } 2282 2283 static void 2284 get_current_stat_values(uint64_t *values) 2285 { 2286 unsigned int lcore_id = rte_lcore_id(); 2287 struct lcore_conf *qconf; 2288 uint64_t app_eps = 0, app_fps = 0, app_br = 0; 2289 uint64_t count = 0; 2290 2291 RTE_LCORE_FOREACH_WORKER(lcore_id) { 2292 qconf = &lcore_conf[lcore_id]; 2293 if (qconf->n_rx_queue == 0) 2294 continue; 2295 count++; 2296 rte_spinlock_lock(&stats[lcore_id].telemetry_lock); 2297 app_eps += stats[lcore_id].ep_nep[1]; 2298 app_fps += stats[lcore_id].fp_nfp[1]; 2299 app_br += stats[lcore_id].br; 2300 rte_spinlock_unlock(&stats[lcore_id].telemetry_lock); 2301 } 2302 2303 if (count > 0) { 2304 values[0] = app_eps/count; 2305 values[1] = app_fps/count; 2306 values[2] = app_br/count; 2307 } else 2308 memset(values, 0, sizeof(uint64_t) * NUM_TELSTATS); 2309 2310 } 2311 2312 static void 2313 update_telemetry(__rte_unused struct rte_timer *tim, 2314 __rte_unused void *arg) 2315 { 2316 int ret; 2317 uint64_t values[NUM_TELSTATS] = {0}; 2318 2319 get_current_stat_values(values); 2320 ret = rte_metrics_update_values(RTE_METRICS_GLOBAL, telstats_index, 2321 values, RTE_DIM(values)); 2322 if (ret < 0) 2323 RTE_LOG(WARNING, POWER, "failed to update metrcis\n"); 2324 } 2325 2326 static int 2327 handle_app_stats(const char *cmd __rte_unused, 2328 const char *params __rte_unused, 2329 struct rte_tel_data *d) 2330 { 2331 uint64_t values[NUM_TELSTATS] = {0}; 2332 uint32_t i; 2333 2334 rte_tel_data_start_dict(d); 2335 get_current_stat_values(values); 2336 for (i = 0; i < NUM_TELSTATS; i++) 2337 rte_tel_data_add_dict_u64(d, telstats_strings[i].name, 2338 values[i]); 2339 return 0; 2340 } 2341 2342 static void 2343 telemetry_setup_timer(void) 2344 { 2345 int lcore_id = rte_lcore_id(); 2346 uint64_t hz = rte_get_timer_hz(); 2347 uint64_t ticks; 2348 2349 ticks = hz / TELEMETRY_INTERVALS_PER_SEC; 2350 rte_timer_reset_sync(&telemetry_timer, 2351 ticks, 2352 PERIODICAL, 2353 lcore_id, 2354 update_telemetry, 2355 NULL); 2356 } 2357 static void 2358 empty_poll_setup_timer(void) 2359 { 2360 int lcore_id = rte_lcore_id(); 2361 uint64_t hz = rte_get_timer_hz(); 2362 2363 struct ep_params *ep_ptr = ep_params; 2364 2365 ep_ptr->interval_ticks = hz / INTERVALS_PER_SECOND; 2366 2367 rte_timer_reset_sync(&ep_ptr->timer0, 2368 ep_ptr->interval_ticks, 2369 PERIODICAL, 2370 lcore_id, 2371 rte_empty_poll_detection, 2372 (void *)ep_ptr); 2373 2374 } 2375 static int 2376 launch_timer(unsigned int lcore_id) 2377 { 2378 int64_t prev_tsc = 0, cur_tsc, diff_tsc, cycles_10ms; 2379 2380 RTE_SET_USED(lcore_id); 2381 2382 2383 if (rte_get_main_lcore() != lcore_id) { 2384 rte_panic("timer on lcore:%d which is not main core:%d\n", 2385 lcore_id, 2386 rte_get_main_lcore()); 2387 } 2388 2389 RTE_LOG(INFO, POWER, "Bring up the Timer\n"); 2390 2391 if (app_mode == APP_MODE_EMPTY_POLL) 2392 empty_poll_setup_timer(); 2393 else 2394 telemetry_setup_timer(); 2395 2396 cycles_10ms = rte_get_timer_hz() / 100; 2397 2398 while (!is_done()) { 2399 cur_tsc = rte_rdtsc(); 2400 diff_tsc = cur_tsc - prev_tsc; 2401 if (diff_tsc > cycles_10ms) { 2402 rte_timer_manage(); 2403 prev_tsc = cur_tsc; 2404 cycles_10ms = rte_get_timer_hz() / 100; 2405 } 2406 } 2407 2408 RTE_LOG(INFO, POWER, "Timer_subsystem is done\n"); 2409 2410 return 0; 2411 } 2412 2413 static int 2414 autodetect_mode(void) 2415 { 2416 RTE_LOG(NOTICE, L3FWD_POWER, "Operating mode not specified, probing frequency scaling support...\n"); 2417 2418 /* 2419 * Empty poll and telemetry modes have to be specifically requested to 2420 * be enabled, but we can auto-detect between interrupt mode with or 2421 * without frequency scaling. Both ACPI and pstate can be used. 2422 */ 2423 if (rte_power_check_env_supported(PM_ENV_ACPI_CPUFREQ)) 2424 return APP_MODE_LEGACY; 2425 if (rte_power_check_env_supported(PM_ENV_PSTATE_CPUFREQ)) 2426 return APP_MODE_LEGACY; 2427 2428 RTE_LOG(NOTICE, L3FWD_POWER, "Frequency scaling not supported, selecting interrupt-only mode\n"); 2429 2430 return APP_MODE_INTERRUPT; 2431 } 2432 2433 static const char * 2434 mode_to_str(enum appmode mode) 2435 { 2436 switch (mode) { 2437 case APP_MODE_LEGACY: 2438 return "legacy"; 2439 case APP_MODE_EMPTY_POLL: 2440 return "empty poll"; 2441 case APP_MODE_TELEMETRY: 2442 return "telemetry"; 2443 case APP_MODE_INTERRUPT: 2444 return "interrupt-only"; 2445 default: 2446 return "invalid"; 2447 } 2448 } 2449 2450 int 2451 main(int argc, char **argv) 2452 { 2453 struct lcore_conf *qconf; 2454 struct rte_eth_dev_info dev_info; 2455 struct rte_eth_txconf *txconf; 2456 int ret; 2457 uint16_t nb_ports; 2458 uint16_t queueid; 2459 unsigned lcore_id; 2460 uint64_t hz; 2461 uint32_t n_tx_queue, nb_lcores; 2462 uint32_t dev_rxq_num, dev_txq_num; 2463 uint8_t nb_rx_queue, queue, socketid; 2464 uint16_t portid; 2465 const char *ptr_strings[NUM_TELSTATS]; 2466 2467 /* catch SIGINT and restore cpufreq governor to ondemand */ 2468 signal(SIGINT, signal_exit_now); 2469 2470 /* init EAL */ 2471 ret = rte_eal_init(argc, argv); 2472 if (ret < 0) 2473 rte_exit(EXIT_FAILURE, "Invalid EAL parameters\n"); 2474 argc -= ret; 2475 argv += ret; 2476 2477 /* init RTE timer library to be used late */ 2478 rte_timer_subsystem_init(); 2479 2480 /* parse application arguments (after the EAL ones) */ 2481 ret = parse_args(argc, argv); 2482 if (ret < 0) 2483 rte_exit(EXIT_FAILURE, "Invalid L3FWD parameters\n"); 2484 2485 if (app_mode == APP_MODE_DEFAULT) 2486 app_mode = autodetect_mode(); 2487 2488 RTE_LOG(INFO, L3FWD_POWER, "Selected operation mode: %s\n", 2489 mode_to_str(app_mode)); 2490 2491 /* only legacy and empty poll mode rely on power library */ 2492 if ((app_mode == APP_MODE_LEGACY || app_mode == APP_MODE_EMPTY_POLL) && 2493 init_power_library()) 2494 rte_exit(EXIT_FAILURE, "init_power_library failed\n"); 2495 2496 if (update_lcore_params() < 0) 2497 rte_exit(EXIT_FAILURE, "update_lcore_params failed\n"); 2498 2499 if (check_lcore_params() < 0) 2500 rte_exit(EXIT_FAILURE, "check_lcore_params failed\n"); 2501 2502 ret = init_lcore_rx_queues(); 2503 if (ret < 0) 2504 rte_exit(EXIT_FAILURE, "init_lcore_rx_queues failed\n"); 2505 2506 nb_ports = rte_eth_dev_count_avail(); 2507 2508 if (check_port_config() < 0) 2509 rte_exit(EXIT_FAILURE, "check_port_config failed\n"); 2510 2511 nb_lcores = rte_lcore_count(); 2512 2513 /* initialize all ports */ 2514 RTE_ETH_FOREACH_DEV(portid) { 2515 struct rte_eth_conf local_port_conf = port_conf; 2516 /* not all app modes need interrupts */ 2517 bool need_intr = app_mode == APP_MODE_LEGACY || 2518 app_mode == APP_MODE_INTERRUPT; 2519 2520 /* skip ports that are not enabled */ 2521 if ((enabled_port_mask & (1 << portid)) == 0) { 2522 printf("\nSkipping disabled port %d\n", portid); 2523 continue; 2524 } 2525 2526 /* init port */ 2527 printf("Initializing port %d ... ", portid ); 2528 fflush(stdout); 2529 2530 ret = rte_eth_dev_info_get(portid, &dev_info); 2531 if (ret != 0) 2532 rte_exit(EXIT_FAILURE, 2533 "Error during getting device (port %u) info: %s\n", 2534 portid, strerror(-ret)); 2535 2536 dev_rxq_num = dev_info.max_rx_queues; 2537 dev_txq_num = dev_info.max_tx_queues; 2538 2539 nb_rx_queue = get_port_n_rx_queues(portid); 2540 if (nb_rx_queue > dev_rxq_num) 2541 rte_exit(EXIT_FAILURE, 2542 "Cannot configure not existed rxq: " 2543 "port=%d\n", portid); 2544 2545 n_tx_queue = nb_lcores; 2546 if (n_tx_queue > dev_txq_num) 2547 n_tx_queue = dev_txq_num; 2548 printf("Creating queues: nb_rxq=%d nb_txq=%u... ", 2549 nb_rx_queue, (unsigned)n_tx_queue ); 2550 /* If number of Rx queue is 0, no need to enable Rx interrupt */ 2551 if (nb_rx_queue == 0) 2552 need_intr = false; 2553 2554 if (need_intr) 2555 local_port_conf.intr_conf.rxq = 1; 2556 2557 ret = rte_eth_dev_info_get(portid, &dev_info); 2558 if (ret != 0) 2559 rte_exit(EXIT_FAILURE, 2560 "Error during getting device (port %u) info: %s\n", 2561 portid, strerror(-ret)); 2562 2563 if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE) 2564 local_port_conf.txmode.offloads |= 2565 DEV_TX_OFFLOAD_MBUF_FAST_FREE; 2566 2567 local_port_conf.rx_adv_conf.rss_conf.rss_hf &= 2568 dev_info.flow_type_rss_offloads; 2569 if (local_port_conf.rx_adv_conf.rss_conf.rss_hf != 2570 port_conf.rx_adv_conf.rss_conf.rss_hf) { 2571 printf("Port %u modified RSS hash function based on hardware support," 2572 "requested:%#"PRIx64" configured:%#"PRIx64"\n", 2573 portid, 2574 port_conf.rx_adv_conf.rss_conf.rss_hf, 2575 local_port_conf.rx_adv_conf.rss_conf.rss_hf); 2576 } 2577 2578 ret = rte_eth_dev_configure(portid, nb_rx_queue, 2579 (uint16_t)n_tx_queue, &local_port_conf); 2580 if (ret < 0) 2581 rte_exit(EXIT_FAILURE, "Cannot configure device: " 2582 "err=%d, port=%d\n", ret, portid); 2583 2584 ret = rte_eth_dev_adjust_nb_rx_tx_desc(portid, &nb_rxd, 2585 &nb_txd); 2586 if (ret < 0) 2587 rte_exit(EXIT_FAILURE, 2588 "Cannot adjust number of descriptors: err=%d, port=%d\n", 2589 ret, portid); 2590 2591 ret = rte_eth_macaddr_get(portid, &ports_eth_addr[portid]); 2592 if (ret < 0) 2593 rte_exit(EXIT_FAILURE, 2594 "Cannot get MAC address: err=%d, port=%d\n", 2595 ret, portid); 2596 2597 print_ethaddr(" Address:", &ports_eth_addr[portid]); 2598 printf(", "); 2599 2600 /* init memory */ 2601 ret = init_mem(NB_MBUF); 2602 if (ret < 0) 2603 rte_exit(EXIT_FAILURE, "init_mem failed\n"); 2604 2605 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { 2606 if (rte_lcore_is_enabled(lcore_id) == 0) 2607 continue; 2608 2609 /* Initialize TX buffers */ 2610 qconf = &lcore_conf[lcore_id]; 2611 qconf->tx_buffer[portid] = rte_zmalloc_socket("tx_buffer", 2612 RTE_ETH_TX_BUFFER_SIZE(MAX_PKT_BURST), 0, 2613 rte_eth_dev_socket_id(portid)); 2614 if (qconf->tx_buffer[portid] == NULL) 2615 rte_exit(EXIT_FAILURE, "Can't allocate tx buffer for port %u\n", 2616 portid); 2617 2618 rte_eth_tx_buffer_init(qconf->tx_buffer[portid], MAX_PKT_BURST); 2619 } 2620 2621 /* init one TX queue per couple (lcore,port) */ 2622 queueid = 0; 2623 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { 2624 if (rte_lcore_is_enabled(lcore_id) == 0) 2625 continue; 2626 2627 if (queueid >= dev_txq_num) 2628 continue; 2629 2630 if (numa_on) 2631 socketid = \ 2632 (uint8_t)rte_lcore_to_socket_id(lcore_id); 2633 else 2634 socketid = 0; 2635 2636 printf("txq=%u,%d,%d ", lcore_id, queueid, socketid); 2637 fflush(stdout); 2638 2639 txconf = &dev_info.default_txconf; 2640 txconf->offloads = local_port_conf.txmode.offloads; 2641 ret = rte_eth_tx_queue_setup(portid, queueid, nb_txd, 2642 socketid, txconf); 2643 if (ret < 0) 2644 rte_exit(EXIT_FAILURE, 2645 "rte_eth_tx_queue_setup: err=%d, " 2646 "port=%d\n", ret, portid); 2647 2648 qconf = &lcore_conf[lcore_id]; 2649 qconf->tx_queue_id[portid] = queueid; 2650 queueid++; 2651 2652 qconf->tx_port_id[qconf->n_tx_port] = portid; 2653 qconf->n_tx_port++; 2654 } 2655 printf("\n"); 2656 } 2657 2658 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { 2659 if (rte_lcore_is_enabled(lcore_id) == 0) 2660 continue; 2661 2662 if (app_mode == APP_MODE_LEGACY) { 2663 /* init timer structures for each enabled lcore */ 2664 rte_timer_init(&power_timers[lcore_id]); 2665 hz = rte_get_timer_hz(); 2666 rte_timer_reset(&power_timers[lcore_id], 2667 hz/TIMER_NUMBER_PER_SECOND, 2668 SINGLE, lcore_id, 2669 power_timer_cb, NULL); 2670 } 2671 qconf = &lcore_conf[lcore_id]; 2672 printf("\nInitializing rx queues on lcore %u ... ", lcore_id ); 2673 fflush(stdout); 2674 /* init RX queues */ 2675 for(queue = 0; queue < qconf->n_rx_queue; ++queue) { 2676 struct rte_eth_rxconf rxq_conf; 2677 2678 portid = qconf->rx_queue_list[queue].port_id; 2679 queueid = qconf->rx_queue_list[queue].queue_id; 2680 2681 if (numa_on) 2682 socketid = \ 2683 (uint8_t)rte_lcore_to_socket_id(lcore_id); 2684 else 2685 socketid = 0; 2686 2687 printf("rxq=%d,%d,%d ", portid, queueid, socketid); 2688 fflush(stdout); 2689 2690 ret = rte_eth_dev_info_get(portid, &dev_info); 2691 if (ret != 0) 2692 rte_exit(EXIT_FAILURE, 2693 "Error during getting device (port %u) info: %s\n", 2694 portid, strerror(-ret)); 2695 2696 rxq_conf = dev_info.default_rxconf; 2697 rxq_conf.offloads = port_conf.rxmode.offloads; 2698 ret = rte_eth_rx_queue_setup(portid, queueid, nb_rxd, 2699 socketid, &rxq_conf, 2700 pktmbuf_pool[socketid]); 2701 if (ret < 0) 2702 rte_exit(EXIT_FAILURE, 2703 "rte_eth_rx_queue_setup: err=%d, " 2704 "port=%d\n", ret, portid); 2705 2706 if (parse_ptype) { 2707 if (add_cb_parse_ptype(portid, queueid) < 0) 2708 rte_exit(EXIT_FAILURE, 2709 "Fail to add ptype cb\n"); 2710 } else if (!check_ptype(portid)) 2711 rte_exit(EXIT_FAILURE, 2712 "PMD can not provide needed ptypes\n"); 2713 } 2714 } 2715 2716 printf("\n"); 2717 2718 /* start ports */ 2719 RTE_ETH_FOREACH_DEV(portid) { 2720 if ((enabled_port_mask & (1 << portid)) == 0) { 2721 continue; 2722 } 2723 /* Start device */ 2724 ret = rte_eth_dev_start(portid); 2725 if (ret < 0) 2726 rte_exit(EXIT_FAILURE, "rte_eth_dev_start: err=%d, " 2727 "port=%d\n", ret, portid); 2728 /* 2729 * If enabled, put device in promiscuous mode. 2730 * This allows IO forwarding mode to forward packets 2731 * to itself through 2 cross-connected ports of the 2732 * target machine. 2733 */ 2734 if (promiscuous_on) { 2735 ret = rte_eth_promiscuous_enable(portid); 2736 if (ret != 0) 2737 rte_exit(EXIT_FAILURE, 2738 "rte_eth_promiscuous_enable: err=%s, port=%u\n", 2739 rte_strerror(-ret), portid); 2740 } 2741 /* initialize spinlock for each port */ 2742 rte_spinlock_init(&(locks[portid])); 2743 } 2744 2745 check_all_ports_link_status(enabled_port_mask); 2746 2747 if (app_mode == APP_MODE_EMPTY_POLL) { 2748 2749 if (empty_poll_train) { 2750 policy.state = TRAINING; 2751 } else { 2752 policy.state = MED_NORMAL; 2753 policy.med_base_edpi = ep_med_edpi; 2754 policy.hgh_base_edpi = ep_hgh_edpi; 2755 } 2756 2757 ret = rte_power_empty_poll_stat_init(&ep_params, 2758 freq_tlb, 2759 &policy); 2760 if (ret < 0) 2761 rte_exit(EXIT_FAILURE, "empty poll init failed"); 2762 } 2763 2764 2765 /* launch per-lcore init on every lcore */ 2766 if (app_mode == APP_MODE_LEGACY) { 2767 rte_eal_mp_remote_launch(main_legacy_loop, NULL, CALL_MAIN); 2768 } else if (app_mode == APP_MODE_EMPTY_POLL) { 2769 empty_poll_stop = false; 2770 rte_eal_mp_remote_launch(main_empty_poll_loop, NULL, 2771 SKIP_MAIN); 2772 } else if (app_mode == APP_MODE_TELEMETRY) { 2773 unsigned int i; 2774 2775 /* Init metrics library */ 2776 rte_metrics_init(rte_socket_id()); 2777 /** Register stats with metrics library */ 2778 for (i = 0; i < NUM_TELSTATS; i++) 2779 ptr_strings[i] = telstats_strings[i].name; 2780 2781 ret = rte_metrics_reg_names(ptr_strings, NUM_TELSTATS); 2782 if (ret >= 0) 2783 telstats_index = ret; 2784 else 2785 rte_exit(EXIT_FAILURE, "failed to register metrics names"); 2786 2787 RTE_LCORE_FOREACH_WORKER(lcore_id) { 2788 rte_spinlock_init(&stats[lcore_id].telemetry_lock); 2789 } 2790 rte_timer_init(&telemetry_timer); 2791 rte_telemetry_register_cmd("/l3fwd-power/stats", 2792 handle_app_stats, 2793 "Returns global power stats. Parameters: None"); 2794 rte_eal_mp_remote_launch(main_telemetry_loop, NULL, 2795 SKIP_MAIN); 2796 } else if (app_mode == APP_MODE_INTERRUPT) { 2797 rte_eal_mp_remote_launch(main_intr_loop, NULL, CALL_MAIN); 2798 } 2799 2800 if (app_mode == APP_MODE_EMPTY_POLL || app_mode == APP_MODE_TELEMETRY) 2801 launch_timer(rte_lcore_id()); 2802 2803 RTE_LCORE_FOREACH_WORKER(lcore_id) { 2804 if (rte_eal_wait_lcore(lcore_id) < 0) 2805 return -1; 2806 } 2807 2808 RTE_ETH_FOREACH_DEV(portid) 2809 { 2810 if ((enabled_port_mask & (1 << portid)) == 0) 2811 continue; 2812 2813 ret = rte_eth_dev_stop(portid); 2814 if (ret != 0) 2815 RTE_LOG(ERR, L3FWD_POWER, "rte_eth_dev_stop: err=%d, port=%u\n", 2816 ret, portid); 2817 2818 rte_eth_dev_close(portid); 2819 } 2820 2821 if (app_mode == APP_MODE_EMPTY_POLL) 2822 rte_power_empty_poll_stat_free(); 2823 2824 if ((app_mode == APP_MODE_LEGACY || app_mode == APP_MODE_EMPTY_POLL) && 2825 deinit_power_library()) 2826 rte_exit(EXIT_FAILURE, "deinit_power_library failed\n"); 2827 2828 if (rte_eal_cleanup() < 0) 2829 RTE_LOG(ERR, L3FWD_POWER, "EAL cleanup failed\n"); 2830 2831 return 0; 2832 } 2833