1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2018 Intel Corporation 3 */ 4 5 #include <stdio.h> 6 #include <stdlib.h> 7 #include <stdint.h> 8 #include <inttypes.h> 9 #include <sys/types.h> 10 #include <string.h> 11 #include <sys/queue.h> 12 #include <stdarg.h> 13 #include <errno.h> 14 #include <getopt.h> 15 #include <unistd.h> 16 #include <signal.h> 17 18 #include <rte_common.h> 19 #include <rte_byteorder.h> 20 #include <rte_log.h> 21 #include <rte_malloc.h> 22 #include <rte_memory.h> 23 #include <rte_memcpy.h> 24 #include <rte_eal.h> 25 #include <rte_launch.h> 26 #include <rte_atomic.h> 27 #include <rte_cycles.h> 28 #include <rte_prefetch.h> 29 #include <rte_lcore.h> 30 #include <rte_per_lcore.h> 31 #include <rte_branch_prediction.h> 32 #include <rte_interrupts.h> 33 #include <rte_random.h> 34 #include <rte_debug.h> 35 #include <rte_ether.h> 36 #include <rte_ethdev.h> 37 #include <rte_mempool.h> 38 #include <rte_mbuf.h> 39 #include <rte_ip.h> 40 #include <rte_tcp.h> 41 #include <rte_udp.h> 42 #include <rte_string_fns.h> 43 #include <rte_timer.h> 44 #include <rte_power.h> 45 #include <rte_spinlock.h> 46 #include <rte_power_empty_poll.h> 47 48 #include "perf_core.h" 49 #include "main.h" 50 51 #define RTE_LOGTYPE_L3FWD_POWER RTE_LOGTYPE_USER1 52 53 #define MAX_PKT_BURST 32 54 55 #define MIN_ZERO_POLL_COUNT 10 56 57 /* 100 ms interval */ 58 #define TIMER_NUMBER_PER_SECOND 10 59 /* (10ms) */ 60 #define INTERVALS_PER_SECOND 100 61 /* 100000 us */ 62 #define SCALING_PERIOD (1000000/TIMER_NUMBER_PER_SECOND) 63 #define SCALING_DOWN_TIME_RATIO_THRESHOLD 0.25 64 65 #define APP_LOOKUP_EXACT_MATCH 0 66 #define APP_LOOKUP_LPM 1 67 #define DO_RFC_1812_CHECKS 68 69 #ifndef APP_LOOKUP_METHOD 70 #define APP_LOOKUP_METHOD APP_LOOKUP_LPM 71 #endif 72 73 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 74 #include <rte_hash.h> 75 #elif (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 76 #include <rte_lpm.h> 77 #else 78 #error "APP_LOOKUP_METHOD set to incorrect value" 79 #endif 80 81 #ifndef IPv6_BYTES 82 #define IPv6_BYTES_FMT "%02x%02x:%02x%02x:%02x%02x:%02x%02x:"\ 83 "%02x%02x:%02x%02x:%02x%02x:%02x%02x" 84 #define IPv6_BYTES(addr) \ 85 addr[0], addr[1], addr[2], addr[3], \ 86 addr[4], addr[5], addr[6], addr[7], \ 87 addr[8], addr[9], addr[10], addr[11],\ 88 addr[12], addr[13],addr[14], addr[15] 89 #endif 90 91 #define MAX_JUMBO_PKT_LEN 9600 92 93 #define IPV6_ADDR_LEN 16 94 95 #define MEMPOOL_CACHE_SIZE 256 96 97 /* 98 * This expression is used to calculate the number of mbufs needed depending on 99 * user input, taking into account memory for rx and tx hardware rings, cache 100 * per lcore and mtable per port per lcore. RTE_MAX is used to ensure that 101 * NB_MBUF never goes below a minimum value of 8192. 102 */ 103 104 #define NB_MBUF RTE_MAX ( \ 105 (nb_ports*nb_rx_queue*nb_rxd + \ 106 nb_ports*nb_lcores*MAX_PKT_BURST + \ 107 nb_ports*n_tx_queue*nb_txd + \ 108 nb_lcores*MEMPOOL_CACHE_SIZE), \ 109 (unsigned)8192) 110 111 #define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */ 112 113 #define NB_SOCKETS 8 114 115 /* Configure how many packets ahead to prefetch, when reading packets */ 116 #define PREFETCH_OFFSET 3 117 118 /* 119 * Configurable number of RX/TX ring descriptors 120 */ 121 #define RTE_TEST_RX_DESC_DEFAULT 1024 122 #define RTE_TEST_TX_DESC_DEFAULT 1024 123 124 /* 125 * These two thresholds were decided on by running the training algorithm on 126 * a 2.5GHz Xeon. These defaults can be overridden by supplying non-zero values 127 * for the med_threshold and high_threshold parameters on the command line. 128 */ 129 #define EMPTY_POLL_MED_THRESHOLD 350000UL 130 #define EMPTY_POLL_HGH_THRESHOLD 580000UL 131 132 133 134 static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT; 135 static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT; 136 137 /* ethernet addresses of ports */ 138 static struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS]; 139 140 /* ethernet addresses of ports */ 141 static rte_spinlock_t locks[RTE_MAX_ETHPORTS]; 142 143 /* mask of enabled ports */ 144 static uint32_t enabled_port_mask = 0; 145 /* Ports set in promiscuous mode off by default. */ 146 static int promiscuous_on = 0; 147 /* NUMA is enabled by default. */ 148 static int numa_on = 1; 149 /* emptypoll is disabled by default. */ 150 static bool empty_poll_on; 151 static bool empty_poll_train; 152 volatile bool empty_poll_stop; 153 static struct ep_params *ep_params; 154 static struct ep_policy policy; 155 static long ep_med_edpi, ep_hgh_edpi; 156 157 static int parse_ptype; /**< Parse packet type using rx callback, and */ 158 /**< disabled by default */ 159 160 enum freq_scale_hint_t 161 { 162 FREQ_LOWER = -1, 163 FREQ_CURRENT = 0, 164 FREQ_HIGHER = 1, 165 FREQ_HIGHEST = 2 166 }; 167 168 struct lcore_rx_queue { 169 uint16_t port_id; 170 uint8_t queue_id; 171 enum freq_scale_hint_t freq_up_hint; 172 uint32_t zero_rx_packet_count; 173 uint32_t idle_hint; 174 } __rte_cache_aligned; 175 176 #define MAX_RX_QUEUE_PER_LCORE 16 177 #define MAX_TX_QUEUE_PER_PORT RTE_MAX_ETHPORTS 178 #define MAX_RX_QUEUE_PER_PORT 128 179 180 #define MAX_RX_QUEUE_INTERRUPT_PER_PORT 16 181 182 183 struct lcore_params lcore_params_array[MAX_LCORE_PARAMS]; 184 static struct lcore_params lcore_params_array_default[] = { 185 {0, 0, 2}, 186 {0, 1, 2}, 187 {0, 2, 2}, 188 {1, 0, 2}, 189 {1, 1, 2}, 190 {1, 2, 2}, 191 {2, 0, 2}, 192 {3, 0, 3}, 193 {3, 1, 3}, 194 }; 195 196 struct lcore_params *lcore_params = lcore_params_array_default; 197 uint16_t nb_lcore_params = sizeof(lcore_params_array_default) / 198 sizeof(lcore_params_array_default[0]); 199 200 static struct rte_eth_conf port_conf = { 201 .rxmode = { 202 .mq_mode = ETH_MQ_RX_RSS, 203 .max_rx_pkt_len = ETHER_MAX_LEN, 204 .split_hdr_size = 0, 205 .offloads = DEV_RX_OFFLOAD_CHECKSUM, 206 }, 207 .rx_adv_conf = { 208 .rss_conf = { 209 .rss_key = NULL, 210 .rss_hf = ETH_RSS_UDP, 211 }, 212 }, 213 .txmode = { 214 .mq_mode = ETH_MQ_TX_NONE, 215 }, 216 .intr_conf = { 217 .rxq = 1, 218 }, 219 }; 220 221 static struct rte_mempool * pktmbuf_pool[NB_SOCKETS]; 222 223 224 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 225 226 #ifdef RTE_ARCH_X86 227 #include <rte_hash_crc.h> 228 #define DEFAULT_HASH_FUNC rte_hash_crc 229 #else 230 #include <rte_jhash.h> 231 #define DEFAULT_HASH_FUNC rte_jhash 232 #endif 233 234 struct ipv4_5tuple { 235 uint32_t ip_dst; 236 uint32_t ip_src; 237 uint16_t port_dst; 238 uint16_t port_src; 239 uint8_t proto; 240 } __attribute__((__packed__)); 241 242 struct ipv6_5tuple { 243 uint8_t ip_dst[IPV6_ADDR_LEN]; 244 uint8_t ip_src[IPV6_ADDR_LEN]; 245 uint16_t port_dst; 246 uint16_t port_src; 247 uint8_t proto; 248 } __attribute__((__packed__)); 249 250 struct ipv4_l3fwd_route { 251 struct ipv4_5tuple key; 252 uint8_t if_out; 253 }; 254 255 struct ipv6_l3fwd_route { 256 struct ipv6_5tuple key; 257 uint8_t if_out; 258 }; 259 260 static struct ipv4_l3fwd_route ipv4_l3fwd_route_array[] = { 261 {{IPv4(100,10,0,1), IPv4(200,10,0,1), 101, 11, IPPROTO_TCP}, 0}, 262 {{IPv4(100,20,0,2), IPv4(200,20,0,2), 102, 12, IPPROTO_TCP}, 1}, 263 {{IPv4(100,30,0,3), IPv4(200,30,0,3), 103, 13, IPPROTO_TCP}, 2}, 264 {{IPv4(100,40,0,4), IPv4(200,40,0,4), 104, 14, IPPROTO_TCP}, 3}, 265 }; 266 267 static struct ipv6_l3fwd_route ipv6_l3fwd_route_array[] = { 268 { 269 { 270 {0xfe, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 271 0x02, 0x1b, 0x21, 0xff, 0xfe, 0x91, 0x38, 0x05}, 272 {0xfe, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 273 0x02, 0x1e, 0x67, 0xff, 0xfe, 0x0d, 0xb6, 0x0a}, 274 1, 10, IPPROTO_UDP 275 }, 4 276 }, 277 }; 278 279 typedef struct rte_hash lookup_struct_t; 280 static lookup_struct_t *ipv4_l3fwd_lookup_struct[NB_SOCKETS]; 281 static lookup_struct_t *ipv6_l3fwd_lookup_struct[NB_SOCKETS]; 282 283 #define L3FWD_HASH_ENTRIES 1024 284 285 #define IPV4_L3FWD_NUM_ROUTES \ 286 (sizeof(ipv4_l3fwd_route_array) / sizeof(ipv4_l3fwd_route_array[0])) 287 288 #define IPV6_L3FWD_NUM_ROUTES \ 289 (sizeof(ipv6_l3fwd_route_array) / sizeof(ipv6_l3fwd_route_array[0])) 290 291 static uint16_t ipv4_l3fwd_out_if[L3FWD_HASH_ENTRIES] __rte_cache_aligned; 292 static uint16_t ipv6_l3fwd_out_if[L3FWD_HASH_ENTRIES] __rte_cache_aligned; 293 #endif 294 295 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 296 struct ipv4_l3fwd_route { 297 uint32_t ip; 298 uint8_t depth; 299 uint8_t if_out; 300 }; 301 302 static struct ipv4_l3fwd_route ipv4_l3fwd_route_array[] = { 303 {IPv4(1,1,1,0), 24, 0}, 304 {IPv4(2,1,1,0), 24, 1}, 305 {IPv4(3,1,1,0), 24, 2}, 306 {IPv4(4,1,1,0), 24, 3}, 307 {IPv4(5,1,1,0), 24, 4}, 308 {IPv4(6,1,1,0), 24, 5}, 309 {IPv4(7,1,1,0), 24, 6}, 310 {IPv4(8,1,1,0), 24, 7}, 311 }; 312 313 #define IPV4_L3FWD_NUM_ROUTES \ 314 (sizeof(ipv4_l3fwd_route_array) / sizeof(ipv4_l3fwd_route_array[0])) 315 316 #define IPV4_L3FWD_LPM_MAX_RULES 1024 317 318 typedef struct rte_lpm lookup_struct_t; 319 static lookup_struct_t *ipv4_l3fwd_lookup_struct[NB_SOCKETS]; 320 #endif 321 322 struct lcore_conf { 323 uint16_t n_rx_queue; 324 struct lcore_rx_queue rx_queue_list[MAX_RX_QUEUE_PER_LCORE]; 325 uint16_t n_tx_port; 326 uint16_t tx_port_id[RTE_MAX_ETHPORTS]; 327 uint16_t tx_queue_id[RTE_MAX_ETHPORTS]; 328 struct rte_eth_dev_tx_buffer *tx_buffer[RTE_MAX_ETHPORTS]; 329 lookup_struct_t * ipv4_lookup_struct; 330 lookup_struct_t * ipv6_lookup_struct; 331 } __rte_cache_aligned; 332 333 struct lcore_stats { 334 /* total sleep time in ms since last frequency scaling down */ 335 uint32_t sleep_time; 336 /* number of long sleep recently */ 337 uint32_t nb_long_sleep; 338 /* freq. scaling up trend */ 339 uint32_t trend; 340 /* total packet processed recently */ 341 uint64_t nb_rx_processed; 342 /* total iterations looped recently */ 343 uint64_t nb_iteration_looped; 344 uint32_t padding[9]; 345 } __rte_cache_aligned; 346 347 static struct lcore_conf lcore_conf[RTE_MAX_LCORE] __rte_cache_aligned; 348 static struct lcore_stats stats[RTE_MAX_LCORE] __rte_cache_aligned; 349 static struct rte_timer power_timers[RTE_MAX_LCORE]; 350 351 static inline uint32_t power_idle_heuristic(uint32_t zero_rx_packet_count); 352 static inline enum freq_scale_hint_t power_freq_scaleup_heuristic( \ 353 unsigned int lcore_id, uint16_t port_id, uint16_t queue_id); 354 355 356 /* 357 * These defaults are using the max frequency index (1), a medium index (9) 358 * and a typical low frequency index (14). These can be adjusted to use 359 * different indexes using the relevant command line parameters. 360 */ 361 static uint8_t freq_tlb[] = {14, 9, 1}; 362 363 static int is_done(void) 364 { 365 return empty_poll_stop; 366 } 367 368 /* exit signal handler */ 369 static void 370 signal_exit_now(int sigtype) 371 { 372 unsigned lcore_id; 373 unsigned int portid; 374 int ret; 375 376 if (sigtype == SIGINT) { 377 if (empty_poll_on) 378 empty_poll_stop = true; 379 380 381 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { 382 if (rte_lcore_is_enabled(lcore_id) == 0) 383 continue; 384 385 /* init power management library */ 386 ret = rte_power_exit(lcore_id); 387 if (ret) 388 rte_exit(EXIT_FAILURE, "Power management " 389 "library de-initialization failed on " 390 "core%u\n", lcore_id); 391 } 392 393 if (!empty_poll_on) { 394 RTE_ETH_FOREACH_DEV(portid) { 395 if ((enabled_port_mask & (1 << portid)) == 0) 396 continue; 397 398 rte_eth_dev_stop(portid); 399 rte_eth_dev_close(portid); 400 } 401 } 402 } 403 404 if (!empty_poll_on) 405 rte_exit(EXIT_SUCCESS, "User forced exit\n"); 406 } 407 408 /* Freqency scale down timer callback */ 409 static void 410 power_timer_cb(__attribute__((unused)) struct rte_timer *tim, 411 __attribute__((unused)) void *arg) 412 { 413 uint64_t hz; 414 float sleep_time_ratio; 415 unsigned lcore_id = rte_lcore_id(); 416 417 /* accumulate total execution time in us when callback is invoked */ 418 sleep_time_ratio = (float)(stats[lcore_id].sleep_time) / 419 (float)SCALING_PERIOD; 420 /** 421 * check whether need to scale down frequency a step if it sleep a lot. 422 */ 423 if (sleep_time_ratio >= SCALING_DOWN_TIME_RATIO_THRESHOLD) { 424 if (rte_power_freq_down) 425 rte_power_freq_down(lcore_id); 426 } 427 else if ( (unsigned)(stats[lcore_id].nb_rx_processed / 428 stats[lcore_id].nb_iteration_looped) < MAX_PKT_BURST) { 429 /** 430 * scale down a step if average packet per iteration less 431 * than expectation. 432 */ 433 if (rte_power_freq_down) 434 rte_power_freq_down(lcore_id); 435 } 436 437 /** 438 * initialize another timer according to current frequency to ensure 439 * timer interval is relatively fixed. 440 */ 441 hz = rte_get_timer_hz(); 442 rte_timer_reset(&power_timers[lcore_id], hz/TIMER_NUMBER_PER_SECOND, 443 SINGLE, lcore_id, power_timer_cb, NULL); 444 445 stats[lcore_id].nb_rx_processed = 0; 446 stats[lcore_id].nb_iteration_looped = 0; 447 448 stats[lcore_id].sleep_time = 0; 449 } 450 451 /* Enqueue a single packet, and send burst if queue is filled */ 452 static inline int 453 send_single_packet(struct rte_mbuf *m, uint16_t port) 454 { 455 uint32_t lcore_id; 456 struct lcore_conf *qconf; 457 458 lcore_id = rte_lcore_id(); 459 qconf = &lcore_conf[lcore_id]; 460 461 rte_eth_tx_buffer(port, qconf->tx_queue_id[port], 462 qconf->tx_buffer[port], m); 463 464 return 0; 465 } 466 467 #ifdef DO_RFC_1812_CHECKS 468 static inline int 469 is_valid_ipv4_pkt(struct ipv4_hdr *pkt, uint32_t link_len) 470 { 471 /* From http://www.rfc-editor.org/rfc/rfc1812.txt section 5.2.2 */ 472 /* 473 * 1. The packet length reported by the Link Layer must be large 474 * enough to hold the minimum length legal IP datagram (20 bytes). 475 */ 476 if (link_len < sizeof(struct ipv4_hdr)) 477 return -1; 478 479 /* 2. The IP checksum must be correct. */ 480 /* this is checked in H/W */ 481 482 /* 483 * 3. The IP version number must be 4. If the version number is not 4 484 * then the packet may be another version of IP, such as IPng or 485 * ST-II. 486 */ 487 if (((pkt->version_ihl) >> 4) != 4) 488 return -3; 489 /* 490 * 4. The IP header length field must be large enough to hold the 491 * minimum length legal IP datagram (20 bytes = 5 words). 492 */ 493 if ((pkt->version_ihl & 0xf) < 5) 494 return -4; 495 496 /* 497 * 5. The IP total length field must be large enough to hold the IP 498 * datagram header, whose length is specified in the IP header length 499 * field. 500 */ 501 if (rte_cpu_to_be_16(pkt->total_length) < sizeof(struct ipv4_hdr)) 502 return -5; 503 504 return 0; 505 } 506 #endif 507 508 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 509 static void 510 print_ipv4_key(struct ipv4_5tuple key) 511 { 512 printf("IP dst = %08x, IP src = %08x, port dst = %d, port src = %d, " 513 "proto = %d\n", (unsigned)key.ip_dst, (unsigned)key.ip_src, 514 key.port_dst, key.port_src, key.proto); 515 } 516 static void 517 print_ipv6_key(struct ipv6_5tuple key) 518 { 519 printf( "IP dst = " IPv6_BYTES_FMT ", IP src = " IPv6_BYTES_FMT ", " 520 "port dst = %d, port src = %d, proto = %d\n", 521 IPv6_BYTES(key.ip_dst), IPv6_BYTES(key.ip_src), 522 key.port_dst, key.port_src, key.proto); 523 } 524 525 static inline uint16_t 526 get_ipv4_dst_port(struct ipv4_hdr *ipv4_hdr, uint16_t portid, 527 lookup_struct_t * ipv4_l3fwd_lookup_struct) 528 { 529 struct ipv4_5tuple key; 530 struct tcp_hdr *tcp; 531 struct udp_hdr *udp; 532 int ret = 0; 533 534 key.ip_dst = rte_be_to_cpu_32(ipv4_hdr->dst_addr); 535 key.ip_src = rte_be_to_cpu_32(ipv4_hdr->src_addr); 536 key.proto = ipv4_hdr->next_proto_id; 537 538 switch (ipv4_hdr->next_proto_id) { 539 case IPPROTO_TCP: 540 tcp = (struct tcp_hdr *)((unsigned char *)ipv4_hdr + 541 sizeof(struct ipv4_hdr)); 542 key.port_dst = rte_be_to_cpu_16(tcp->dst_port); 543 key.port_src = rte_be_to_cpu_16(tcp->src_port); 544 break; 545 546 case IPPROTO_UDP: 547 udp = (struct udp_hdr *)((unsigned char *)ipv4_hdr + 548 sizeof(struct ipv4_hdr)); 549 key.port_dst = rte_be_to_cpu_16(udp->dst_port); 550 key.port_src = rte_be_to_cpu_16(udp->src_port); 551 break; 552 553 default: 554 key.port_dst = 0; 555 key.port_src = 0; 556 break; 557 } 558 559 /* Find destination port */ 560 ret = rte_hash_lookup(ipv4_l3fwd_lookup_struct, (const void *)&key); 561 return ((ret < 0) ? portid : ipv4_l3fwd_out_if[ret]); 562 } 563 564 static inline uint16_t 565 get_ipv6_dst_port(struct ipv6_hdr *ipv6_hdr, uint16_t portid, 566 lookup_struct_t *ipv6_l3fwd_lookup_struct) 567 { 568 struct ipv6_5tuple key; 569 struct tcp_hdr *tcp; 570 struct udp_hdr *udp; 571 int ret = 0; 572 573 memcpy(key.ip_dst, ipv6_hdr->dst_addr, IPV6_ADDR_LEN); 574 memcpy(key.ip_src, ipv6_hdr->src_addr, IPV6_ADDR_LEN); 575 576 key.proto = ipv6_hdr->proto; 577 578 switch (ipv6_hdr->proto) { 579 case IPPROTO_TCP: 580 tcp = (struct tcp_hdr *)((unsigned char *) ipv6_hdr + 581 sizeof(struct ipv6_hdr)); 582 key.port_dst = rte_be_to_cpu_16(tcp->dst_port); 583 key.port_src = rte_be_to_cpu_16(tcp->src_port); 584 break; 585 586 case IPPROTO_UDP: 587 udp = (struct udp_hdr *)((unsigned char *) ipv6_hdr + 588 sizeof(struct ipv6_hdr)); 589 key.port_dst = rte_be_to_cpu_16(udp->dst_port); 590 key.port_src = rte_be_to_cpu_16(udp->src_port); 591 break; 592 593 default: 594 key.port_dst = 0; 595 key.port_src = 0; 596 break; 597 } 598 599 /* Find destination port */ 600 ret = rte_hash_lookup(ipv6_l3fwd_lookup_struct, (const void *)&key); 601 return ((ret < 0) ? portid : ipv6_l3fwd_out_if[ret]); 602 } 603 #endif 604 605 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 606 static inline uint16_t 607 get_ipv4_dst_port(struct ipv4_hdr *ipv4_hdr, uint16_t portid, 608 lookup_struct_t *ipv4_l3fwd_lookup_struct) 609 { 610 uint32_t next_hop; 611 612 return ((rte_lpm_lookup(ipv4_l3fwd_lookup_struct, 613 rte_be_to_cpu_32(ipv4_hdr->dst_addr), &next_hop) == 0)? 614 next_hop : portid); 615 } 616 #endif 617 618 static inline void 619 parse_ptype_one(struct rte_mbuf *m) 620 { 621 struct ether_hdr *eth_hdr; 622 uint32_t packet_type = RTE_PTYPE_UNKNOWN; 623 uint16_t ether_type; 624 625 eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *); 626 ether_type = eth_hdr->ether_type; 627 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) 628 packet_type |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN; 629 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv6)) 630 packet_type |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN; 631 632 m->packet_type = packet_type; 633 } 634 635 static uint16_t 636 cb_parse_ptype(uint16_t port __rte_unused, uint16_t queue __rte_unused, 637 struct rte_mbuf *pkts[], uint16_t nb_pkts, 638 uint16_t max_pkts __rte_unused, 639 void *user_param __rte_unused) 640 { 641 unsigned int i; 642 643 for (i = 0; i < nb_pkts; ++i) 644 parse_ptype_one(pkts[i]); 645 646 return nb_pkts; 647 } 648 649 static int 650 add_cb_parse_ptype(uint16_t portid, uint16_t queueid) 651 { 652 printf("Port %d: softly parse packet type info\n", portid); 653 if (rte_eth_add_rx_callback(portid, queueid, cb_parse_ptype, NULL)) 654 return 0; 655 656 printf("Failed to add rx callback: port=%d\n", portid); 657 return -1; 658 } 659 660 static inline void 661 l3fwd_simple_forward(struct rte_mbuf *m, uint16_t portid, 662 struct lcore_conf *qconf) 663 { 664 struct ether_hdr *eth_hdr; 665 struct ipv4_hdr *ipv4_hdr; 666 void *d_addr_bytes; 667 uint16_t dst_port; 668 669 eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *); 670 671 if (RTE_ETH_IS_IPV4_HDR(m->packet_type)) { 672 /* Handle IPv4 headers.*/ 673 ipv4_hdr = 674 rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, 675 sizeof(struct ether_hdr)); 676 677 #ifdef DO_RFC_1812_CHECKS 678 /* Check to make sure the packet is valid (RFC1812) */ 679 if (is_valid_ipv4_pkt(ipv4_hdr, m->pkt_len) < 0) { 680 rte_pktmbuf_free(m); 681 return; 682 } 683 #endif 684 685 dst_port = get_ipv4_dst_port(ipv4_hdr, portid, 686 qconf->ipv4_lookup_struct); 687 if (dst_port >= RTE_MAX_ETHPORTS || 688 (enabled_port_mask & 1 << dst_port) == 0) 689 dst_port = portid; 690 691 /* 02:00:00:00:00:xx */ 692 d_addr_bytes = ð_hdr->d_addr.addr_bytes[0]; 693 *((uint64_t *)d_addr_bytes) = 694 0x000000000002 + ((uint64_t)dst_port << 40); 695 696 #ifdef DO_RFC_1812_CHECKS 697 /* Update time to live and header checksum */ 698 --(ipv4_hdr->time_to_live); 699 ++(ipv4_hdr->hdr_checksum); 700 #endif 701 702 /* src addr */ 703 ether_addr_copy(&ports_eth_addr[dst_port], ð_hdr->s_addr); 704 705 send_single_packet(m, dst_port); 706 } else if (RTE_ETH_IS_IPV6_HDR(m->packet_type)) { 707 /* Handle IPv6 headers.*/ 708 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 709 struct ipv6_hdr *ipv6_hdr; 710 711 ipv6_hdr = 712 rte_pktmbuf_mtod_offset(m, struct ipv6_hdr *, 713 sizeof(struct ether_hdr)); 714 715 dst_port = get_ipv6_dst_port(ipv6_hdr, portid, 716 qconf->ipv6_lookup_struct); 717 718 if (dst_port >= RTE_MAX_ETHPORTS || 719 (enabled_port_mask & 1 << dst_port) == 0) 720 dst_port = portid; 721 722 /* 02:00:00:00:00:xx */ 723 d_addr_bytes = ð_hdr->d_addr.addr_bytes[0]; 724 *((uint64_t *)d_addr_bytes) = 725 0x000000000002 + ((uint64_t)dst_port << 40); 726 727 /* src addr */ 728 ether_addr_copy(&ports_eth_addr[dst_port], ð_hdr->s_addr); 729 730 send_single_packet(m, dst_port); 731 #else 732 /* We don't currently handle IPv6 packets in LPM mode. */ 733 rte_pktmbuf_free(m); 734 #endif 735 } else 736 rte_pktmbuf_free(m); 737 738 } 739 740 #define MINIMUM_SLEEP_TIME 1 741 #define SUSPEND_THRESHOLD 300 742 743 static inline uint32_t 744 power_idle_heuristic(uint32_t zero_rx_packet_count) 745 { 746 /* If zero count is less than 100, sleep 1us */ 747 if (zero_rx_packet_count < SUSPEND_THRESHOLD) 748 return MINIMUM_SLEEP_TIME; 749 /* If zero count is less than 1000, sleep 100 us which is the 750 minimum latency switching from C3/C6 to C0 751 */ 752 else 753 return SUSPEND_THRESHOLD; 754 } 755 756 static inline enum freq_scale_hint_t 757 power_freq_scaleup_heuristic(unsigned lcore_id, 758 uint16_t port_id, 759 uint16_t queue_id) 760 { 761 uint32_t rxq_count = rte_eth_rx_queue_count(port_id, queue_id); 762 /** 763 * HW Rx queue size is 128 by default, Rx burst read at maximum 32 entries 764 * per iteration 765 */ 766 #define FREQ_GEAR1_RX_PACKET_THRESHOLD MAX_PKT_BURST 767 #define FREQ_GEAR2_RX_PACKET_THRESHOLD (MAX_PKT_BURST*2) 768 #define FREQ_GEAR3_RX_PACKET_THRESHOLD (MAX_PKT_BURST*3) 769 #define FREQ_UP_TREND1_ACC 1 770 #define FREQ_UP_TREND2_ACC 100 771 #define FREQ_UP_THRESHOLD 10000 772 773 if (likely(rxq_count > FREQ_GEAR3_RX_PACKET_THRESHOLD)) { 774 stats[lcore_id].trend = 0; 775 return FREQ_HIGHEST; 776 } else if (likely(rxq_count > FREQ_GEAR2_RX_PACKET_THRESHOLD)) 777 stats[lcore_id].trend += FREQ_UP_TREND2_ACC; 778 else if (likely(rxq_count > FREQ_GEAR1_RX_PACKET_THRESHOLD)) 779 stats[lcore_id].trend += FREQ_UP_TREND1_ACC; 780 781 if (likely(stats[lcore_id].trend > FREQ_UP_THRESHOLD)) { 782 stats[lcore_id].trend = 0; 783 return FREQ_HIGHER; 784 } 785 786 return FREQ_CURRENT; 787 } 788 789 /** 790 * force polling thread sleep until one-shot rx interrupt triggers 791 * @param port_id 792 * Port id. 793 * @param queue_id 794 * Rx queue id. 795 * @return 796 * 0 on success 797 */ 798 static int 799 sleep_until_rx_interrupt(int num) 800 { 801 struct rte_epoll_event event[num]; 802 int n, i; 803 uint16_t port_id; 804 uint8_t queue_id; 805 void *data; 806 807 RTE_LOG(INFO, L3FWD_POWER, 808 "lcore %u sleeps until interrupt triggers\n", 809 rte_lcore_id()); 810 811 n = rte_epoll_wait(RTE_EPOLL_PER_THREAD, event, num, -1); 812 for (i = 0; i < n; i++) { 813 data = event[i].epdata.data; 814 port_id = ((uintptr_t)data) >> CHAR_BIT; 815 queue_id = ((uintptr_t)data) & 816 RTE_LEN2MASK(CHAR_BIT, uint8_t); 817 rte_eth_dev_rx_intr_disable(port_id, queue_id); 818 RTE_LOG(INFO, L3FWD_POWER, 819 "lcore %u is waked up from rx interrupt on" 820 " port %d queue %d\n", 821 rte_lcore_id(), port_id, queue_id); 822 } 823 824 return 0; 825 } 826 827 static void turn_on_intr(struct lcore_conf *qconf) 828 { 829 int i; 830 struct lcore_rx_queue *rx_queue; 831 uint8_t queue_id; 832 uint16_t port_id; 833 834 for (i = 0; i < qconf->n_rx_queue; ++i) { 835 rx_queue = &(qconf->rx_queue_list[i]); 836 port_id = rx_queue->port_id; 837 queue_id = rx_queue->queue_id; 838 839 rte_spinlock_lock(&(locks[port_id])); 840 rte_eth_dev_rx_intr_enable(port_id, queue_id); 841 rte_spinlock_unlock(&(locks[port_id])); 842 } 843 } 844 845 static int event_register(struct lcore_conf *qconf) 846 { 847 struct lcore_rx_queue *rx_queue; 848 uint8_t queueid; 849 uint16_t portid; 850 uint32_t data; 851 int ret; 852 int i; 853 854 for (i = 0; i < qconf->n_rx_queue; ++i) { 855 rx_queue = &(qconf->rx_queue_list[i]); 856 portid = rx_queue->port_id; 857 queueid = rx_queue->queue_id; 858 data = portid << CHAR_BIT | queueid; 859 860 ret = rte_eth_dev_rx_intr_ctl_q(portid, queueid, 861 RTE_EPOLL_PER_THREAD, 862 RTE_INTR_EVENT_ADD, 863 (void *)((uintptr_t)data)); 864 if (ret) 865 return ret; 866 } 867 868 return 0; 869 } 870 /* main processing loop */ 871 static int 872 main_empty_poll_loop(__attribute__((unused)) void *dummy) 873 { 874 struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; 875 unsigned int lcore_id; 876 uint64_t prev_tsc, diff_tsc, cur_tsc; 877 int i, j, nb_rx; 878 uint8_t queueid; 879 uint16_t portid; 880 struct lcore_conf *qconf; 881 struct lcore_rx_queue *rx_queue; 882 883 const uint64_t drain_tsc = 884 (rte_get_tsc_hz() + US_PER_S - 1) / 885 US_PER_S * BURST_TX_DRAIN_US; 886 887 prev_tsc = 0; 888 889 lcore_id = rte_lcore_id(); 890 qconf = &lcore_conf[lcore_id]; 891 892 if (qconf->n_rx_queue == 0) { 893 RTE_LOG(INFO, L3FWD_POWER, "lcore %u has nothing to do\n", 894 lcore_id); 895 return 0; 896 } 897 898 for (i = 0; i < qconf->n_rx_queue; i++) { 899 portid = qconf->rx_queue_list[i].port_id; 900 queueid = qconf->rx_queue_list[i].queue_id; 901 RTE_LOG(INFO, L3FWD_POWER, " -- lcoreid=%u portid=%u " 902 "rxqueueid=%hhu\n", lcore_id, portid, queueid); 903 } 904 905 while (!is_done()) { 906 stats[lcore_id].nb_iteration_looped++; 907 908 cur_tsc = rte_rdtsc(); 909 /* 910 * TX burst queue drain 911 */ 912 diff_tsc = cur_tsc - prev_tsc; 913 if (unlikely(diff_tsc > drain_tsc)) { 914 for (i = 0; i < qconf->n_tx_port; ++i) { 915 portid = qconf->tx_port_id[i]; 916 rte_eth_tx_buffer_flush(portid, 917 qconf->tx_queue_id[portid], 918 qconf->tx_buffer[portid]); 919 } 920 prev_tsc = cur_tsc; 921 } 922 923 /* 924 * Read packet from RX queues 925 */ 926 for (i = 0; i < qconf->n_rx_queue; ++i) { 927 rx_queue = &(qconf->rx_queue_list[i]); 928 rx_queue->idle_hint = 0; 929 portid = rx_queue->port_id; 930 queueid = rx_queue->queue_id; 931 932 nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst, 933 MAX_PKT_BURST); 934 935 stats[lcore_id].nb_rx_processed += nb_rx; 936 937 if (nb_rx == 0) { 938 939 rte_power_empty_poll_stat_update(lcore_id); 940 941 continue; 942 } else { 943 rte_power_poll_stat_update(lcore_id, nb_rx); 944 } 945 946 947 /* Prefetch first packets */ 948 for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++) { 949 rte_prefetch0(rte_pktmbuf_mtod( 950 pkts_burst[j], void *)); 951 } 952 953 /* Prefetch and forward already prefetched packets */ 954 for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) { 955 rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[ 956 j + PREFETCH_OFFSET], 957 void *)); 958 l3fwd_simple_forward(pkts_burst[j], portid, 959 qconf); 960 } 961 962 /* Forward remaining prefetched packets */ 963 for (; j < nb_rx; j++) { 964 l3fwd_simple_forward(pkts_burst[j], portid, 965 qconf); 966 } 967 968 } 969 970 } 971 972 return 0; 973 } 974 /* main processing loop */ 975 static int 976 main_loop(__attribute__((unused)) void *dummy) 977 { 978 struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; 979 unsigned lcore_id; 980 uint64_t prev_tsc, diff_tsc, cur_tsc, tim_res_tsc, hz; 981 uint64_t prev_tsc_power = 0, cur_tsc_power, diff_tsc_power; 982 int i, j, nb_rx; 983 uint8_t queueid; 984 uint16_t portid; 985 struct lcore_conf *qconf; 986 struct lcore_rx_queue *rx_queue; 987 enum freq_scale_hint_t lcore_scaleup_hint; 988 uint32_t lcore_rx_idle_count = 0; 989 uint32_t lcore_idle_hint = 0; 990 int intr_en = 0; 991 992 const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * BURST_TX_DRAIN_US; 993 994 prev_tsc = 0; 995 hz = rte_get_timer_hz(); 996 tim_res_tsc = hz/TIMER_NUMBER_PER_SECOND; 997 998 lcore_id = rte_lcore_id(); 999 qconf = &lcore_conf[lcore_id]; 1000 1001 if (qconf->n_rx_queue == 0) { 1002 RTE_LOG(INFO, L3FWD_POWER, "lcore %u has nothing to do\n", lcore_id); 1003 return 0; 1004 } 1005 1006 RTE_LOG(INFO, L3FWD_POWER, "entering main loop on lcore %u\n", lcore_id); 1007 1008 for (i = 0; i < qconf->n_rx_queue; i++) { 1009 portid = qconf->rx_queue_list[i].port_id; 1010 queueid = qconf->rx_queue_list[i].queue_id; 1011 RTE_LOG(INFO, L3FWD_POWER, " -- lcoreid=%u portid=%u " 1012 "rxqueueid=%hhu\n", lcore_id, portid, queueid); 1013 } 1014 1015 /* add into event wait list */ 1016 if (event_register(qconf) == 0) 1017 intr_en = 1; 1018 else 1019 RTE_LOG(INFO, L3FWD_POWER, "RX interrupt won't enable.\n"); 1020 1021 while (1) { 1022 stats[lcore_id].nb_iteration_looped++; 1023 1024 cur_tsc = rte_rdtsc(); 1025 cur_tsc_power = cur_tsc; 1026 1027 /* 1028 * TX burst queue drain 1029 */ 1030 diff_tsc = cur_tsc - prev_tsc; 1031 if (unlikely(diff_tsc > drain_tsc)) { 1032 for (i = 0; i < qconf->n_tx_port; ++i) { 1033 portid = qconf->tx_port_id[i]; 1034 rte_eth_tx_buffer_flush(portid, 1035 qconf->tx_queue_id[portid], 1036 qconf->tx_buffer[portid]); 1037 } 1038 prev_tsc = cur_tsc; 1039 } 1040 1041 diff_tsc_power = cur_tsc_power - prev_tsc_power; 1042 if (diff_tsc_power > tim_res_tsc) { 1043 rte_timer_manage(); 1044 prev_tsc_power = cur_tsc_power; 1045 } 1046 1047 start_rx: 1048 /* 1049 * Read packet from RX queues 1050 */ 1051 lcore_scaleup_hint = FREQ_CURRENT; 1052 lcore_rx_idle_count = 0; 1053 for (i = 0; i < qconf->n_rx_queue; ++i) { 1054 rx_queue = &(qconf->rx_queue_list[i]); 1055 rx_queue->idle_hint = 0; 1056 portid = rx_queue->port_id; 1057 queueid = rx_queue->queue_id; 1058 1059 nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst, 1060 MAX_PKT_BURST); 1061 1062 stats[lcore_id].nb_rx_processed += nb_rx; 1063 if (unlikely(nb_rx == 0)) { 1064 /** 1065 * no packet received from rx queue, try to 1066 * sleep for a while forcing CPU enter deeper 1067 * C states. 1068 */ 1069 rx_queue->zero_rx_packet_count++; 1070 1071 if (rx_queue->zero_rx_packet_count <= 1072 MIN_ZERO_POLL_COUNT) 1073 continue; 1074 1075 rx_queue->idle_hint = power_idle_heuristic(\ 1076 rx_queue->zero_rx_packet_count); 1077 lcore_rx_idle_count++; 1078 } else { 1079 rx_queue->zero_rx_packet_count = 0; 1080 1081 /** 1082 * do not scale up frequency immediately as 1083 * user to kernel space communication is costly 1084 * which might impact packet I/O for received 1085 * packets. 1086 */ 1087 rx_queue->freq_up_hint = 1088 power_freq_scaleup_heuristic(lcore_id, 1089 portid, queueid); 1090 } 1091 1092 /* Prefetch first packets */ 1093 for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++) { 1094 rte_prefetch0(rte_pktmbuf_mtod( 1095 pkts_burst[j], void *)); 1096 } 1097 1098 /* Prefetch and forward already prefetched packets */ 1099 for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) { 1100 rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[ 1101 j + PREFETCH_OFFSET], void *)); 1102 l3fwd_simple_forward(pkts_burst[j], portid, 1103 qconf); 1104 } 1105 1106 /* Forward remaining prefetched packets */ 1107 for (; j < nb_rx; j++) { 1108 l3fwd_simple_forward(pkts_burst[j], portid, 1109 qconf); 1110 } 1111 } 1112 1113 if (likely(lcore_rx_idle_count != qconf->n_rx_queue)) { 1114 for (i = 1, lcore_scaleup_hint = 1115 qconf->rx_queue_list[0].freq_up_hint; 1116 i < qconf->n_rx_queue; ++i) { 1117 rx_queue = &(qconf->rx_queue_list[i]); 1118 if (rx_queue->freq_up_hint > 1119 lcore_scaleup_hint) 1120 lcore_scaleup_hint = 1121 rx_queue->freq_up_hint; 1122 } 1123 1124 if (lcore_scaleup_hint == FREQ_HIGHEST) { 1125 if (rte_power_freq_max) 1126 rte_power_freq_max(lcore_id); 1127 } else if (lcore_scaleup_hint == FREQ_HIGHER) { 1128 if (rte_power_freq_up) 1129 rte_power_freq_up(lcore_id); 1130 } 1131 } else { 1132 /** 1133 * All Rx queues empty in recent consecutive polls, 1134 * sleep in a conservative manner, meaning sleep as 1135 * less as possible. 1136 */ 1137 for (i = 1, lcore_idle_hint = 1138 qconf->rx_queue_list[0].idle_hint; 1139 i < qconf->n_rx_queue; ++i) { 1140 rx_queue = &(qconf->rx_queue_list[i]); 1141 if (rx_queue->idle_hint < lcore_idle_hint) 1142 lcore_idle_hint = rx_queue->idle_hint; 1143 } 1144 1145 if (lcore_idle_hint < SUSPEND_THRESHOLD) 1146 /** 1147 * execute "pause" instruction to avoid context 1148 * switch which generally take hundred of 1149 * microseconds for short sleep. 1150 */ 1151 rte_delay_us(lcore_idle_hint); 1152 else { 1153 /* suspend until rx interrupt trigges */ 1154 if (intr_en) { 1155 turn_on_intr(qconf); 1156 sleep_until_rx_interrupt( 1157 qconf->n_rx_queue); 1158 /** 1159 * start receiving packets immediately 1160 */ 1161 goto start_rx; 1162 } 1163 } 1164 stats[lcore_id].sleep_time += lcore_idle_hint; 1165 } 1166 } 1167 } 1168 1169 static int 1170 check_lcore_params(void) 1171 { 1172 uint8_t queue, lcore; 1173 uint16_t i; 1174 int socketid; 1175 1176 for (i = 0; i < nb_lcore_params; ++i) { 1177 queue = lcore_params[i].queue_id; 1178 if (queue >= MAX_RX_QUEUE_PER_PORT) { 1179 printf("invalid queue number: %hhu\n", queue); 1180 return -1; 1181 } 1182 lcore = lcore_params[i].lcore_id; 1183 if (!rte_lcore_is_enabled(lcore)) { 1184 printf("error: lcore %hhu is not enabled in lcore " 1185 "mask\n", lcore); 1186 return -1; 1187 } 1188 if ((socketid = rte_lcore_to_socket_id(lcore) != 0) && 1189 (numa_on == 0)) { 1190 printf("warning: lcore %hhu is on socket %d with numa " 1191 "off\n", lcore, socketid); 1192 } 1193 } 1194 return 0; 1195 } 1196 1197 static int 1198 check_port_config(void) 1199 { 1200 unsigned portid; 1201 uint16_t i; 1202 1203 for (i = 0; i < nb_lcore_params; ++i) { 1204 portid = lcore_params[i].port_id; 1205 if ((enabled_port_mask & (1 << portid)) == 0) { 1206 printf("port %u is not enabled in port mask\n", 1207 portid); 1208 return -1; 1209 } 1210 if (!rte_eth_dev_is_valid_port(portid)) { 1211 printf("port %u is not present on the board\n", 1212 portid); 1213 return -1; 1214 } 1215 } 1216 return 0; 1217 } 1218 1219 static uint8_t 1220 get_port_n_rx_queues(const uint16_t port) 1221 { 1222 int queue = -1; 1223 uint16_t i; 1224 1225 for (i = 0; i < nb_lcore_params; ++i) { 1226 if (lcore_params[i].port_id == port && 1227 lcore_params[i].queue_id > queue) 1228 queue = lcore_params[i].queue_id; 1229 } 1230 return (uint8_t)(++queue); 1231 } 1232 1233 static int 1234 init_lcore_rx_queues(void) 1235 { 1236 uint16_t i, nb_rx_queue; 1237 uint8_t lcore; 1238 1239 for (i = 0; i < nb_lcore_params; ++i) { 1240 lcore = lcore_params[i].lcore_id; 1241 nb_rx_queue = lcore_conf[lcore].n_rx_queue; 1242 if (nb_rx_queue >= MAX_RX_QUEUE_PER_LCORE) { 1243 printf("error: too many queues (%u) for lcore: %u\n", 1244 (unsigned)nb_rx_queue + 1, (unsigned)lcore); 1245 return -1; 1246 } else { 1247 lcore_conf[lcore].rx_queue_list[nb_rx_queue].port_id = 1248 lcore_params[i].port_id; 1249 lcore_conf[lcore].rx_queue_list[nb_rx_queue].queue_id = 1250 lcore_params[i].queue_id; 1251 lcore_conf[lcore].n_rx_queue++; 1252 } 1253 } 1254 return 0; 1255 } 1256 1257 /* display usage */ 1258 static void 1259 print_usage(const char *prgname) 1260 { 1261 printf ("%s [EAL options] -- -p PORTMASK -P" 1262 " [--config (port,queue,lcore)[,(port,queue,lcore]]" 1263 " [--high-perf-cores CORELIST" 1264 " [--perf-config (port,queue,hi_perf,lcore_index)[,(port,queue,hi_perf,lcore_index]]" 1265 " [--enable-jumbo [--max-pkt-len PKTLEN]]\n" 1266 " -p PORTMASK: hexadecimal bitmask of ports to configure\n" 1267 " -P : enable promiscuous mode\n" 1268 " --config (port,queue,lcore): rx queues configuration\n" 1269 " --high-perf-cores CORELIST: list of high performance cores\n" 1270 " --perf-config: similar as config, cores specified as indices" 1271 " for bins containing high or regular performance cores\n" 1272 " --no-numa: optional, disable numa awareness\n" 1273 " --enable-jumbo: enable jumbo frame" 1274 " which max packet len is PKTLEN in decimal (64-9600)\n" 1275 " --parse-ptype: parse packet type by software\n" 1276 " --empty-poll: enable empty poll detection" 1277 " follow (training_flag, high_threshold, med_threshold)\n", 1278 prgname); 1279 } 1280 1281 static int parse_max_pkt_len(const char *pktlen) 1282 { 1283 char *end = NULL; 1284 unsigned long len; 1285 1286 /* parse decimal string */ 1287 len = strtoul(pktlen, &end, 10); 1288 if ((pktlen[0] == '\0') || (end == NULL) || (*end != '\0')) 1289 return -1; 1290 1291 if (len == 0) 1292 return -1; 1293 1294 return len; 1295 } 1296 1297 static int 1298 parse_portmask(const char *portmask) 1299 { 1300 char *end = NULL; 1301 unsigned long pm; 1302 1303 /* parse hexadecimal string */ 1304 pm = strtoul(portmask, &end, 16); 1305 if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0')) 1306 return -1; 1307 1308 if (pm == 0) 1309 return -1; 1310 1311 return pm; 1312 } 1313 1314 static int 1315 parse_config(const char *q_arg) 1316 { 1317 char s[256]; 1318 const char *p, *p0 = q_arg; 1319 char *end; 1320 enum fieldnames { 1321 FLD_PORT = 0, 1322 FLD_QUEUE, 1323 FLD_LCORE, 1324 _NUM_FLD 1325 }; 1326 unsigned long int_fld[_NUM_FLD]; 1327 char *str_fld[_NUM_FLD]; 1328 int i; 1329 unsigned size; 1330 1331 nb_lcore_params = 0; 1332 1333 while ((p = strchr(p0,'(')) != NULL) { 1334 ++p; 1335 if((p0 = strchr(p,')')) == NULL) 1336 return -1; 1337 1338 size = p0 - p; 1339 if(size >= sizeof(s)) 1340 return -1; 1341 1342 snprintf(s, sizeof(s), "%.*s", size, p); 1343 if (rte_strsplit(s, sizeof(s), str_fld, _NUM_FLD, ',') != 1344 _NUM_FLD) 1345 return -1; 1346 for (i = 0; i < _NUM_FLD; i++){ 1347 errno = 0; 1348 int_fld[i] = strtoul(str_fld[i], &end, 0); 1349 if (errno != 0 || end == str_fld[i] || int_fld[i] > 1350 255) 1351 return -1; 1352 } 1353 if (nb_lcore_params >= MAX_LCORE_PARAMS) { 1354 printf("exceeded max number of lcore params: %hu\n", 1355 nb_lcore_params); 1356 return -1; 1357 } 1358 lcore_params_array[nb_lcore_params].port_id = 1359 (uint8_t)int_fld[FLD_PORT]; 1360 lcore_params_array[nb_lcore_params].queue_id = 1361 (uint8_t)int_fld[FLD_QUEUE]; 1362 lcore_params_array[nb_lcore_params].lcore_id = 1363 (uint8_t)int_fld[FLD_LCORE]; 1364 ++nb_lcore_params; 1365 } 1366 lcore_params = lcore_params_array; 1367 1368 return 0; 1369 } 1370 static int 1371 parse_ep_config(const char *q_arg) 1372 { 1373 char s[256]; 1374 const char *p = q_arg; 1375 char *end; 1376 int num_arg; 1377 1378 char *str_fld[3]; 1379 1380 int training_flag; 1381 int med_edpi; 1382 int hgh_edpi; 1383 1384 ep_med_edpi = EMPTY_POLL_MED_THRESHOLD; 1385 ep_hgh_edpi = EMPTY_POLL_MED_THRESHOLD; 1386 1387 strlcpy(s, p, sizeof(s)); 1388 1389 num_arg = rte_strsplit(s, sizeof(s), str_fld, 3, ','); 1390 1391 empty_poll_train = false; 1392 1393 if (num_arg == 0) 1394 return 0; 1395 1396 if (num_arg == 3) { 1397 1398 training_flag = strtoul(str_fld[0], &end, 0); 1399 med_edpi = strtoul(str_fld[1], &end, 0); 1400 hgh_edpi = strtoul(str_fld[2], &end, 0); 1401 1402 if (training_flag == 1) 1403 empty_poll_train = true; 1404 1405 if (med_edpi > 0) 1406 ep_med_edpi = med_edpi; 1407 1408 if (med_edpi > 0) 1409 ep_hgh_edpi = hgh_edpi; 1410 1411 } else { 1412 1413 return -1; 1414 } 1415 1416 return 0; 1417 1418 } 1419 #define CMD_LINE_OPT_PARSE_PTYPE "parse-ptype" 1420 1421 /* Parse the argument given in the command line of the application */ 1422 static int 1423 parse_args(int argc, char **argv) 1424 { 1425 int opt, ret; 1426 char **argvopt; 1427 int option_index; 1428 uint32_t limit; 1429 char *prgname = argv[0]; 1430 static struct option lgopts[] = { 1431 {"config", 1, 0, 0}, 1432 {"perf-config", 1, 0, 0}, 1433 {"high-perf-cores", 1, 0, 0}, 1434 {"no-numa", 0, 0, 0}, 1435 {"enable-jumbo", 0, 0, 0}, 1436 {"empty-poll", 1, 0, 0}, 1437 {CMD_LINE_OPT_PARSE_PTYPE, 0, 0, 0}, 1438 {NULL, 0, 0, 0} 1439 }; 1440 1441 argvopt = argv; 1442 1443 while ((opt = getopt_long(argc, argvopt, "p:l:m:h:P", 1444 lgopts, &option_index)) != EOF) { 1445 1446 switch (opt) { 1447 /* portmask */ 1448 case 'p': 1449 enabled_port_mask = parse_portmask(optarg); 1450 if (enabled_port_mask == 0) { 1451 printf("invalid portmask\n"); 1452 print_usage(prgname); 1453 return -1; 1454 } 1455 break; 1456 case 'P': 1457 printf("Promiscuous mode selected\n"); 1458 promiscuous_on = 1; 1459 break; 1460 case 'l': 1461 limit = parse_max_pkt_len(optarg); 1462 freq_tlb[LOW] = limit; 1463 break; 1464 case 'm': 1465 limit = parse_max_pkt_len(optarg); 1466 freq_tlb[MED] = limit; 1467 break; 1468 case 'h': 1469 limit = parse_max_pkt_len(optarg); 1470 freq_tlb[HGH] = limit; 1471 break; 1472 /* long options */ 1473 case 0: 1474 if (!strncmp(lgopts[option_index].name, "config", 6)) { 1475 ret = parse_config(optarg); 1476 if (ret) { 1477 printf("invalid config\n"); 1478 print_usage(prgname); 1479 return -1; 1480 } 1481 } 1482 1483 if (!strncmp(lgopts[option_index].name, 1484 "perf-config", 11)) { 1485 ret = parse_perf_config(optarg); 1486 if (ret) { 1487 printf("invalid perf-config\n"); 1488 print_usage(prgname); 1489 return -1; 1490 } 1491 } 1492 1493 if (!strncmp(lgopts[option_index].name, 1494 "high-perf-cores", 15)) { 1495 ret = parse_perf_core_list(optarg); 1496 if (ret) { 1497 printf("invalid high-perf-cores\n"); 1498 print_usage(prgname); 1499 return -1; 1500 } 1501 } 1502 1503 if (!strncmp(lgopts[option_index].name, 1504 "no-numa", 7)) { 1505 printf("numa is disabled \n"); 1506 numa_on = 0; 1507 } 1508 1509 if (!strncmp(lgopts[option_index].name, 1510 "empty-poll", 10)) { 1511 printf("empty-poll is enabled\n"); 1512 empty_poll_on = true; 1513 ret = parse_ep_config(optarg); 1514 1515 if (ret) { 1516 printf("invalid empty poll config\n"); 1517 print_usage(prgname); 1518 return -1; 1519 } 1520 1521 } 1522 1523 if (!strncmp(lgopts[option_index].name, 1524 "enable-jumbo", 12)) { 1525 struct option lenopts = 1526 {"max-pkt-len", required_argument, \ 1527 0, 0}; 1528 1529 printf("jumbo frame is enabled \n"); 1530 port_conf.rxmode.offloads |= 1531 DEV_RX_OFFLOAD_JUMBO_FRAME; 1532 port_conf.txmode.offloads |= 1533 DEV_TX_OFFLOAD_MULTI_SEGS; 1534 1535 /** 1536 * if no max-pkt-len set, use the default value 1537 * ETHER_MAX_LEN 1538 */ 1539 if (0 == getopt_long(argc, argvopt, "", 1540 &lenopts, &option_index)) { 1541 ret = parse_max_pkt_len(optarg); 1542 if ((ret < 64) || 1543 (ret > MAX_JUMBO_PKT_LEN)){ 1544 printf("invalid packet " 1545 "length\n"); 1546 print_usage(prgname); 1547 return -1; 1548 } 1549 port_conf.rxmode.max_rx_pkt_len = ret; 1550 } 1551 printf("set jumbo frame " 1552 "max packet length to %u\n", 1553 (unsigned int)port_conf.rxmode.max_rx_pkt_len); 1554 } 1555 1556 if (!strncmp(lgopts[option_index].name, 1557 CMD_LINE_OPT_PARSE_PTYPE, 1558 sizeof(CMD_LINE_OPT_PARSE_PTYPE))) { 1559 printf("soft parse-ptype is enabled\n"); 1560 parse_ptype = 1; 1561 } 1562 1563 break; 1564 1565 default: 1566 print_usage(prgname); 1567 return -1; 1568 } 1569 } 1570 1571 if (optind >= 0) 1572 argv[optind-1] = prgname; 1573 1574 ret = optind-1; 1575 optind = 1; /* reset getopt lib */ 1576 return ret; 1577 } 1578 1579 static void 1580 print_ethaddr(const char *name, const struct ether_addr *eth_addr) 1581 { 1582 char buf[ETHER_ADDR_FMT_SIZE]; 1583 ether_format_addr(buf, ETHER_ADDR_FMT_SIZE, eth_addr); 1584 printf("%s%s", name, buf); 1585 } 1586 1587 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 1588 static void 1589 setup_hash(int socketid) 1590 { 1591 struct rte_hash_parameters ipv4_l3fwd_hash_params = { 1592 .name = NULL, 1593 .entries = L3FWD_HASH_ENTRIES, 1594 .key_len = sizeof(struct ipv4_5tuple), 1595 .hash_func = DEFAULT_HASH_FUNC, 1596 .hash_func_init_val = 0, 1597 }; 1598 1599 struct rte_hash_parameters ipv6_l3fwd_hash_params = { 1600 .name = NULL, 1601 .entries = L3FWD_HASH_ENTRIES, 1602 .key_len = sizeof(struct ipv6_5tuple), 1603 .hash_func = DEFAULT_HASH_FUNC, 1604 .hash_func_init_val = 0, 1605 }; 1606 1607 unsigned i; 1608 int ret; 1609 char s[64]; 1610 1611 /* create ipv4 hash */ 1612 snprintf(s, sizeof(s), "ipv4_l3fwd_hash_%d", socketid); 1613 ipv4_l3fwd_hash_params.name = s; 1614 ipv4_l3fwd_hash_params.socket_id = socketid; 1615 ipv4_l3fwd_lookup_struct[socketid] = 1616 rte_hash_create(&ipv4_l3fwd_hash_params); 1617 if (ipv4_l3fwd_lookup_struct[socketid] == NULL) 1618 rte_exit(EXIT_FAILURE, "Unable to create the l3fwd hash on " 1619 "socket %d\n", socketid); 1620 1621 /* create ipv6 hash */ 1622 snprintf(s, sizeof(s), "ipv6_l3fwd_hash_%d", socketid); 1623 ipv6_l3fwd_hash_params.name = s; 1624 ipv6_l3fwd_hash_params.socket_id = socketid; 1625 ipv6_l3fwd_lookup_struct[socketid] = 1626 rte_hash_create(&ipv6_l3fwd_hash_params); 1627 if (ipv6_l3fwd_lookup_struct[socketid] == NULL) 1628 rte_exit(EXIT_FAILURE, "Unable to create the l3fwd hash on " 1629 "socket %d\n", socketid); 1630 1631 1632 /* populate the ipv4 hash */ 1633 for (i = 0; i < IPV4_L3FWD_NUM_ROUTES; i++) { 1634 ret = rte_hash_add_key (ipv4_l3fwd_lookup_struct[socketid], 1635 (void *) &ipv4_l3fwd_route_array[i].key); 1636 if (ret < 0) { 1637 rte_exit(EXIT_FAILURE, "Unable to add entry %u to the" 1638 "l3fwd hash on socket %d\n", i, socketid); 1639 } 1640 ipv4_l3fwd_out_if[ret] = ipv4_l3fwd_route_array[i].if_out; 1641 printf("Hash: Adding key\n"); 1642 print_ipv4_key(ipv4_l3fwd_route_array[i].key); 1643 } 1644 1645 /* populate the ipv6 hash */ 1646 for (i = 0; i < IPV6_L3FWD_NUM_ROUTES; i++) { 1647 ret = rte_hash_add_key (ipv6_l3fwd_lookup_struct[socketid], 1648 (void *) &ipv6_l3fwd_route_array[i].key); 1649 if (ret < 0) { 1650 rte_exit(EXIT_FAILURE, "Unable to add entry %u to the" 1651 "l3fwd hash on socket %d\n", i, socketid); 1652 } 1653 ipv6_l3fwd_out_if[ret] = ipv6_l3fwd_route_array[i].if_out; 1654 printf("Hash: Adding key\n"); 1655 print_ipv6_key(ipv6_l3fwd_route_array[i].key); 1656 } 1657 } 1658 #endif 1659 1660 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 1661 static void 1662 setup_lpm(int socketid) 1663 { 1664 unsigned i; 1665 int ret; 1666 char s[64]; 1667 1668 /* create the LPM table */ 1669 struct rte_lpm_config lpm_ipv4_config; 1670 1671 lpm_ipv4_config.max_rules = IPV4_L3FWD_LPM_MAX_RULES; 1672 lpm_ipv4_config.number_tbl8s = 256; 1673 lpm_ipv4_config.flags = 0; 1674 1675 snprintf(s, sizeof(s), "IPV4_L3FWD_LPM_%d", socketid); 1676 ipv4_l3fwd_lookup_struct[socketid] = 1677 rte_lpm_create(s, socketid, &lpm_ipv4_config); 1678 if (ipv4_l3fwd_lookup_struct[socketid] == NULL) 1679 rte_exit(EXIT_FAILURE, "Unable to create the l3fwd LPM table" 1680 " on socket %d\n", socketid); 1681 1682 /* populate the LPM table */ 1683 for (i = 0; i < IPV4_L3FWD_NUM_ROUTES; i++) { 1684 ret = rte_lpm_add(ipv4_l3fwd_lookup_struct[socketid], 1685 ipv4_l3fwd_route_array[i].ip, 1686 ipv4_l3fwd_route_array[i].depth, 1687 ipv4_l3fwd_route_array[i].if_out); 1688 1689 if (ret < 0) { 1690 rte_exit(EXIT_FAILURE, "Unable to add entry %u to the " 1691 "l3fwd LPM table on socket %d\n", 1692 i, socketid); 1693 } 1694 1695 printf("LPM: Adding route 0x%08x / %d (%d)\n", 1696 (unsigned)ipv4_l3fwd_route_array[i].ip, 1697 ipv4_l3fwd_route_array[i].depth, 1698 ipv4_l3fwd_route_array[i].if_out); 1699 } 1700 } 1701 #endif 1702 1703 static int 1704 init_mem(unsigned nb_mbuf) 1705 { 1706 struct lcore_conf *qconf; 1707 int socketid; 1708 unsigned lcore_id; 1709 char s[64]; 1710 1711 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { 1712 if (rte_lcore_is_enabled(lcore_id) == 0) 1713 continue; 1714 1715 if (numa_on) 1716 socketid = rte_lcore_to_socket_id(lcore_id); 1717 else 1718 socketid = 0; 1719 1720 if (socketid >= NB_SOCKETS) { 1721 rte_exit(EXIT_FAILURE, "Socket %d of lcore %u is " 1722 "out of range %d\n", socketid, 1723 lcore_id, NB_SOCKETS); 1724 } 1725 if (pktmbuf_pool[socketid] == NULL) { 1726 snprintf(s, sizeof(s), "mbuf_pool_%d", socketid); 1727 pktmbuf_pool[socketid] = 1728 rte_pktmbuf_pool_create(s, nb_mbuf, 1729 MEMPOOL_CACHE_SIZE, 0, 1730 RTE_MBUF_DEFAULT_BUF_SIZE, 1731 socketid); 1732 if (pktmbuf_pool[socketid] == NULL) 1733 rte_exit(EXIT_FAILURE, 1734 "Cannot init mbuf pool on socket %d\n", 1735 socketid); 1736 else 1737 printf("Allocated mbuf pool on socket %d\n", 1738 socketid); 1739 1740 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 1741 setup_lpm(socketid); 1742 #else 1743 setup_hash(socketid); 1744 #endif 1745 } 1746 qconf = &lcore_conf[lcore_id]; 1747 qconf->ipv4_lookup_struct = ipv4_l3fwd_lookup_struct[socketid]; 1748 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 1749 qconf->ipv6_lookup_struct = ipv6_l3fwd_lookup_struct[socketid]; 1750 #endif 1751 } 1752 return 0; 1753 } 1754 1755 /* Check the link status of all ports in up to 9s, and print them finally */ 1756 static void 1757 check_all_ports_link_status(uint32_t port_mask) 1758 { 1759 #define CHECK_INTERVAL 100 /* 100ms */ 1760 #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */ 1761 uint8_t count, all_ports_up, print_flag = 0; 1762 uint16_t portid; 1763 struct rte_eth_link link; 1764 1765 printf("\nChecking link status"); 1766 fflush(stdout); 1767 for (count = 0; count <= MAX_CHECK_TIME; count++) { 1768 all_ports_up = 1; 1769 RTE_ETH_FOREACH_DEV(portid) { 1770 if ((port_mask & (1 << portid)) == 0) 1771 continue; 1772 memset(&link, 0, sizeof(link)); 1773 rte_eth_link_get_nowait(portid, &link); 1774 /* print link status if flag set */ 1775 if (print_flag == 1) { 1776 if (link.link_status) 1777 printf("Port %d Link Up - speed %u " 1778 "Mbps - %s\n", (uint8_t)portid, 1779 (unsigned)link.link_speed, 1780 (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? 1781 ("full-duplex") : ("half-duplex\n")); 1782 else 1783 printf("Port %d Link Down\n", 1784 (uint8_t)portid); 1785 continue; 1786 } 1787 /* clear all_ports_up flag if any link down */ 1788 if (link.link_status == ETH_LINK_DOWN) { 1789 all_ports_up = 0; 1790 break; 1791 } 1792 } 1793 /* after finally printing all link status, get out */ 1794 if (print_flag == 1) 1795 break; 1796 1797 if (all_ports_up == 0) { 1798 printf("."); 1799 fflush(stdout); 1800 rte_delay_ms(CHECK_INTERVAL); 1801 } 1802 1803 /* set the print_flag if all ports up or timeout */ 1804 if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) { 1805 print_flag = 1; 1806 printf("done\n"); 1807 } 1808 } 1809 } 1810 1811 static int check_ptype(uint16_t portid) 1812 { 1813 int i, ret; 1814 int ptype_l3_ipv4 = 0; 1815 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 1816 int ptype_l3_ipv6 = 0; 1817 #endif 1818 uint32_t ptype_mask = RTE_PTYPE_L3_MASK; 1819 1820 ret = rte_eth_dev_get_supported_ptypes(portid, ptype_mask, NULL, 0); 1821 if (ret <= 0) 1822 return 0; 1823 1824 uint32_t ptypes[ret]; 1825 1826 ret = rte_eth_dev_get_supported_ptypes(portid, ptype_mask, ptypes, ret); 1827 for (i = 0; i < ret; ++i) { 1828 if (ptypes[i] & RTE_PTYPE_L3_IPV4) 1829 ptype_l3_ipv4 = 1; 1830 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 1831 if (ptypes[i] & RTE_PTYPE_L3_IPV6) 1832 ptype_l3_ipv6 = 1; 1833 #endif 1834 } 1835 1836 if (ptype_l3_ipv4 == 0) 1837 printf("port %d cannot parse RTE_PTYPE_L3_IPV4\n", portid); 1838 1839 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 1840 if (ptype_l3_ipv6 == 0) 1841 printf("port %d cannot parse RTE_PTYPE_L3_IPV6\n", portid); 1842 #endif 1843 1844 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 1845 if (ptype_l3_ipv4) 1846 #else /* APP_LOOKUP_EXACT_MATCH */ 1847 if (ptype_l3_ipv4 && ptype_l3_ipv6) 1848 #endif 1849 return 1; 1850 1851 return 0; 1852 1853 } 1854 1855 static int 1856 init_power_library(void) 1857 { 1858 int ret = 0, lcore_id; 1859 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { 1860 if (rte_lcore_is_enabled(lcore_id)) { 1861 /* init power management library */ 1862 ret = rte_power_init(lcore_id); 1863 if (ret) 1864 RTE_LOG(ERR, POWER, 1865 "Library initialization failed on core %u\n", 1866 lcore_id); 1867 } 1868 } 1869 return ret; 1870 } 1871 static void 1872 empty_poll_setup_timer(void) 1873 { 1874 int lcore_id = rte_lcore_id(); 1875 uint64_t hz = rte_get_timer_hz(); 1876 1877 struct ep_params *ep_ptr = ep_params; 1878 1879 ep_ptr->interval_ticks = hz / INTERVALS_PER_SECOND; 1880 1881 rte_timer_reset_sync(&ep_ptr->timer0, 1882 ep_ptr->interval_ticks, 1883 PERIODICAL, 1884 lcore_id, 1885 rte_empty_poll_detection, 1886 (void *)ep_ptr); 1887 1888 } 1889 static int 1890 launch_timer(unsigned int lcore_id) 1891 { 1892 int64_t prev_tsc = 0, cur_tsc, diff_tsc, cycles_10ms; 1893 1894 RTE_SET_USED(lcore_id); 1895 1896 1897 if (rte_get_master_lcore() != lcore_id) { 1898 rte_panic("timer on lcore:%d which is not master core:%d\n", 1899 lcore_id, 1900 rte_get_master_lcore()); 1901 } 1902 1903 RTE_LOG(INFO, POWER, "Bring up the Timer\n"); 1904 1905 empty_poll_setup_timer(); 1906 1907 cycles_10ms = rte_get_timer_hz() / 100; 1908 1909 while (!is_done()) { 1910 cur_tsc = rte_rdtsc(); 1911 diff_tsc = cur_tsc - prev_tsc; 1912 if (diff_tsc > cycles_10ms) { 1913 rte_timer_manage(); 1914 prev_tsc = cur_tsc; 1915 cycles_10ms = rte_get_timer_hz() / 100; 1916 } 1917 } 1918 1919 RTE_LOG(INFO, POWER, "Timer_subsystem is done\n"); 1920 1921 return 0; 1922 } 1923 1924 1925 int 1926 main(int argc, char **argv) 1927 { 1928 struct lcore_conf *qconf; 1929 struct rte_eth_dev_info dev_info; 1930 struct rte_eth_txconf *txconf; 1931 int ret; 1932 uint16_t nb_ports; 1933 uint16_t queueid; 1934 unsigned lcore_id; 1935 uint64_t hz; 1936 uint32_t n_tx_queue, nb_lcores; 1937 uint32_t dev_rxq_num, dev_txq_num; 1938 uint8_t nb_rx_queue, queue, socketid; 1939 uint16_t portid; 1940 1941 /* catch SIGINT and restore cpufreq governor to ondemand */ 1942 signal(SIGINT, signal_exit_now); 1943 1944 /* init EAL */ 1945 ret = rte_eal_init(argc, argv); 1946 if (ret < 0) 1947 rte_exit(EXIT_FAILURE, "Invalid EAL parameters\n"); 1948 argc -= ret; 1949 argv += ret; 1950 1951 /* init RTE timer library to be used late */ 1952 rte_timer_subsystem_init(); 1953 1954 /* parse application arguments (after the EAL ones) */ 1955 ret = parse_args(argc, argv); 1956 if (ret < 0) 1957 rte_exit(EXIT_FAILURE, "Invalid L3FWD parameters\n"); 1958 1959 if (init_power_library()) 1960 RTE_LOG(ERR, L3FWD_POWER, "init_power_library failed\n"); 1961 1962 if (update_lcore_params() < 0) 1963 rte_exit(EXIT_FAILURE, "update_lcore_params failed\n"); 1964 1965 if (check_lcore_params() < 0) 1966 rte_exit(EXIT_FAILURE, "check_lcore_params failed\n"); 1967 1968 ret = init_lcore_rx_queues(); 1969 if (ret < 0) 1970 rte_exit(EXIT_FAILURE, "init_lcore_rx_queues failed\n"); 1971 1972 nb_ports = rte_eth_dev_count_avail(); 1973 1974 if (check_port_config() < 0) 1975 rte_exit(EXIT_FAILURE, "check_port_config failed\n"); 1976 1977 nb_lcores = rte_lcore_count(); 1978 1979 /* initialize all ports */ 1980 RTE_ETH_FOREACH_DEV(portid) { 1981 struct rte_eth_conf local_port_conf = port_conf; 1982 1983 /* skip ports that are not enabled */ 1984 if ((enabled_port_mask & (1 << portid)) == 0) { 1985 printf("\nSkipping disabled port %d\n", portid); 1986 continue; 1987 } 1988 1989 /* init port */ 1990 printf("Initializing port %d ... ", portid ); 1991 fflush(stdout); 1992 1993 rte_eth_dev_info_get(portid, &dev_info); 1994 dev_rxq_num = dev_info.max_rx_queues; 1995 dev_txq_num = dev_info.max_tx_queues; 1996 1997 nb_rx_queue = get_port_n_rx_queues(portid); 1998 if (nb_rx_queue > dev_rxq_num) 1999 rte_exit(EXIT_FAILURE, 2000 "Cannot configure not existed rxq: " 2001 "port=%d\n", portid); 2002 2003 n_tx_queue = nb_lcores; 2004 if (n_tx_queue > dev_txq_num) 2005 n_tx_queue = dev_txq_num; 2006 printf("Creating queues: nb_rxq=%d nb_txq=%u... ", 2007 nb_rx_queue, (unsigned)n_tx_queue ); 2008 /* If number of Rx queue is 0, no need to enable Rx interrupt */ 2009 if (nb_rx_queue == 0) 2010 local_port_conf.intr_conf.rxq = 0; 2011 rte_eth_dev_info_get(portid, &dev_info); 2012 if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE) 2013 local_port_conf.txmode.offloads |= 2014 DEV_TX_OFFLOAD_MBUF_FAST_FREE; 2015 2016 local_port_conf.rx_adv_conf.rss_conf.rss_hf &= 2017 dev_info.flow_type_rss_offloads; 2018 if (local_port_conf.rx_adv_conf.rss_conf.rss_hf != 2019 port_conf.rx_adv_conf.rss_conf.rss_hf) { 2020 printf("Port %u modified RSS hash function based on hardware support," 2021 "requested:%#"PRIx64" configured:%#"PRIx64"\n", 2022 portid, 2023 port_conf.rx_adv_conf.rss_conf.rss_hf, 2024 local_port_conf.rx_adv_conf.rss_conf.rss_hf); 2025 } 2026 2027 ret = rte_eth_dev_configure(portid, nb_rx_queue, 2028 (uint16_t)n_tx_queue, &local_port_conf); 2029 if (ret < 0) 2030 rte_exit(EXIT_FAILURE, "Cannot configure device: " 2031 "err=%d, port=%d\n", ret, portid); 2032 2033 ret = rte_eth_dev_adjust_nb_rx_tx_desc(portid, &nb_rxd, 2034 &nb_txd); 2035 if (ret < 0) 2036 rte_exit(EXIT_FAILURE, 2037 "Cannot adjust number of descriptors: err=%d, port=%d\n", 2038 ret, portid); 2039 2040 rte_eth_macaddr_get(portid, &ports_eth_addr[portid]); 2041 print_ethaddr(" Address:", &ports_eth_addr[portid]); 2042 printf(", "); 2043 2044 /* init memory */ 2045 ret = init_mem(NB_MBUF); 2046 if (ret < 0) 2047 rte_exit(EXIT_FAILURE, "init_mem failed\n"); 2048 2049 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { 2050 if (rte_lcore_is_enabled(lcore_id) == 0) 2051 continue; 2052 2053 /* Initialize TX buffers */ 2054 qconf = &lcore_conf[lcore_id]; 2055 qconf->tx_buffer[portid] = rte_zmalloc_socket("tx_buffer", 2056 RTE_ETH_TX_BUFFER_SIZE(MAX_PKT_BURST), 0, 2057 rte_eth_dev_socket_id(portid)); 2058 if (qconf->tx_buffer[portid] == NULL) 2059 rte_exit(EXIT_FAILURE, "Can't allocate tx buffer for port %u\n", 2060 portid); 2061 2062 rte_eth_tx_buffer_init(qconf->tx_buffer[portid], MAX_PKT_BURST); 2063 } 2064 2065 /* init one TX queue per couple (lcore,port) */ 2066 queueid = 0; 2067 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { 2068 if (rte_lcore_is_enabled(lcore_id) == 0) 2069 continue; 2070 2071 if (queueid >= dev_txq_num) 2072 continue; 2073 2074 if (numa_on) 2075 socketid = \ 2076 (uint8_t)rte_lcore_to_socket_id(lcore_id); 2077 else 2078 socketid = 0; 2079 2080 printf("txq=%u,%d,%d ", lcore_id, queueid, socketid); 2081 fflush(stdout); 2082 2083 txconf = &dev_info.default_txconf; 2084 txconf->offloads = local_port_conf.txmode.offloads; 2085 ret = rte_eth_tx_queue_setup(portid, queueid, nb_txd, 2086 socketid, txconf); 2087 if (ret < 0) 2088 rte_exit(EXIT_FAILURE, 2089 "rte_eth_tx_queue_setup: err=%d, " 2090 "port=%d\n", ret, portid); 2091 2092 qconf = &lcore_conf[lcore_id]; 2093 qconf->tx_queue_id[portid] = queueid; 2094 queueid++; 2095 2096 qconf->tx_port_id[qconf->n_tx_port] = portid; 2097 qconf->n_tx_port++; 2098 } 2099 printf("\n"); 2100 } 2101 2102 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { 2103 if (rte_lcore_is_enabled(lcore_id) == 0) 2104 continue; 2105 2106 if (empty_poll_on == false) { 2107 /* init timer structures for each enabled lcore */ 2108 rte_timer_init(&power_timers[lcore_id]); 2109 hz = rte_get_timer_hz(); 2110 rte_timer_reset(&power_timers[lcore_id], 2111 hz/TIMER_NUMBER_PER_SECOND, 2112 SINGLE, lcore_id, 2113 power_timer_cb, NULL); 2114 } 2115 qconf = &lcore_conf[lcore_id]; 2116 printf("\nInitializing rx queues on lcore %u ... ", lcore_id ); 2117 fflush(stdout); 2118 /* init RX queues */ 2119 for(queue = 0; queue < qconf->n_rx_queue; ++queue) { 2120 struct rte_eth_rxconf rxq_conf; 2121 struct rte_eth_dev *dev; 2122 struct rte_eth_conf *conf; 2123 2124 portid = qconf->rx_queue_list[queue].port_id; 2125 queueid = qconf->rx_queue_list[queue].queue_id; 2126 dev = &rte_eth_devices[portid]; 2127 conf = &dev->data->dev_conf; 2128 2129 if (numa_on) 2130 socketid = \ 2131 (uint8_t)rte_lcore_to_socket_id(lcore_id); 2132 else 2133 socketid = 0; 2134 2135 printf("rxq=%d,%d,%d ", portid, queueid, socketid); 2136 fflush(stdout); 2137 2138 rte_eth_dev_info_get(portid, &dev_info); 2139 rxq_conf = dev_info.default_rxconf; 2140 rxq_conf.offloads = conf->rxmode.offloads; 2141 ret = rte_eth_rx_queue_setup(portid, queueid, nb_rxd, 2142 socketid, &rxq_conf, 2143 pktmbuf_pool[socketid]); 2144 if (ret < 0) 2145 rte_exit(EXIT_FAILURE, 2146 "rte_eth_rx_queue_setup: err=%d, " 2147 "port=%d\n", ret, portid); 2148 2149 if (parse_ptype) { 2150 if (add_cb_parse_ptype(portid, queueid) < 0) 2151 rte_exit(EXIT_FAILURE, 2152 "Fail to add ptype cb\n"); 2153 } else if (!check_ptype(portid)) 2154 rte_exit(EXIT_FAILURE, 2155 "PMD can not provide needed ptypes\n"); 2156 } 2157 } 2158 2159 printf("\n"); 2160 2161 /* start ports */ 2162 RTE_ETH_FOREACH_DEV(portid) { 2163 if ((enabled_port_mask & (1 << portid)) == 0) { 2164 continue; 2165 } 2166 /* Start device */ 2167 ret = rte_eth_dev_start(portid); 2168 if (ret < 0) 2169 rte_exit(EXIT_FAILURE, "rte_eth_dev_start: err=%d, " 2170 "port=%d\n", ret, portid); 2171 /* 2172 * If enabled, put device in promiscuous mode. 2173 * This allows IO forwarding mode to forward packets 2174 * to itself through 2 cross-connected ports of the 2175 * target machine. 2176 */ 2177 if (promiscuous_on) 2178 rte_eth_promiscuous_enable(portid); 2179 /* initialize spinlock for each port */ 2180 rte_spinlock_init(&(locks[portid])); 2181 } 2182 2183 check_all_ports_link_status(enabled_port_mask); 2184 2185 if (empty_poll_on == true) { 2186 2187 if (empty_poll_train) { 2188 policy.state = TRAINING; 2189 } else { 2190 policy.state = MED_NORMAL; 2191 policy.med_base_edpi = ep_med_edpi; 2192 policy.hgh_base_edpi = ep_hgh_edpi; 2193 } 2194 2195 ret = rte_power_empty_poll_stat_init(&ep_params, 2196 freq_tlb, 2197 &policy); 2198 if (ret < 0) 2199 rte_exit(EXIT_FAILURE, "empty poll init failed"); 2200 } 2201 2202 2203 /* launch per-lcore init on every lcore */ 2204 if (empty_poll_on == false) { 2205 rte_eal_mp_remote_launch(main_loop, NULL, CALL_MASTER); 2206 } else { 2207 empty_poll_stop = false; 2208 rte_eal_mp_remote_launch(main_empty_poll_loop, NULL, 2209 SKIP_MASTER); 2210 } 2211 2212 if (empty_poll_on == true) 2213 launch_timer(rte_lcore_id()); 2214 2215 RTE_LCORE_FOREACH_SLAVE(lcore_id) { 2216 if (rte_eal_wait_lcore(lcore_id) < 0) 2217 return -1; 2218 } 2219 2220 if (empty_poll_on) 2221 rte_power_empty_poll_stat_free(); 2222 2223 return 0; 2224 } 2225