1 /*- 2 * BSD LICENSE 3 * 4 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <stdio.h> 35 #include <stdlib.h> 36 #include <stdint.h> 37 #include <inttypes.h> 38 #include <sys/types.h> 39 #include <string.h> 40 #include <sys/queue.h> 41 #include <stdarg.h> 42 #include <errno.h> 43 #include <getopt.h> 44 #include <signal.h> 45 46 #include <rte_common.h> 47 #include <rte_byteorder.h> 48 #include <rte_log.h> 49 #include <rte_memory.h> 50 #include <rte_memcpy.h> 51 #include <rte_memzone.h> 52 #include <rte_tailq.h> 53 #include <rte_eal.h> 54 #include <rte_per_lcore.h> 55 #include <rte_launch.h> 56 #include <rte_atomic.h> 57 #include <rte_cycles.h> 58 #include <rte_prefetch.h> 59 #include <rte_lcore.h> 60 #include <rte_per_lcore.h> 61 #include <rte_branch_prediction.h> 62 #include <rte_interrupts.h> 63 #include <rte_pci.h> 64 #include <rte_random.h> 65 #include <rte_debug.h> 66 #include <rte_ether.h> 67 #include <rte_ethdev.h> 68 #include <rte_ring.h> 69 #include <rte_mempool.h> 70 #include <rte_mbuf.h> 71 #include <rte_malloc.h> 72 #include <rte_ip.h> 73 #include <rte_tcp.h> 74 #include <rte_udp.h> 75 #include <rte_string_fns.h> 76 #include "main.h" 77 78 #define APP_LOOKUP_EXACT_MATCH 0 79 #define APP_LOOKUP_LPM 1 80 #define DO_RFC_1812_CHECKS 81 82 #ifndef APP_LOOKUP_METHOD 83 #define APP_LOOKUP_METHOD APP_LOOKUP_LPM 84 #endif 85 86 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 87 #include <rte_hash.h> 88 #elif (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 89 #include <rte_lpm.h> 90 #include <rte_lpm6.h> 91 #else 92 #error "APP_LOOKUP_METHOD set to incorrect value" 93 #endif 94 95 #define MAX_PKT_BURST 32 96 97 #include "ipv4_rsmbl.h" 98 99 #ifndef IPv6_BYTES 100 #define IPv6_BYTES_FMT "%02x%02x:%02x%02x:%02x%02x:%02x%02x:"\ 101 "%02x%02x:%02x%02x:%02x%02x:%02x%02x" 102 #define IPv6_BYTES(addr) \ 103 addr[0], addr[1], addr[2], addr[3], \ 104 addr[4], addr[5], addr[6], addr[7], \ 105 addr[8], addr[9], addr[10], addr[11],\ 106 addr[12], addr[13],addr[14], addr[15] 107 #endif 108 109 110 #define RTE_LOGTYPE_L3FWD RTE_LOGTYPE_USER1 111 112 #define MAX_PORTS RTE_MAX_ETHPORTS 113 114 #define MAX_JUMBO_PKT_LEN 9600 115 116 #define IPV6_ADDR_LEN 16 117 118 #define MEMPOOL_CACHE_SIZE 256 119 120 #define BUF_SIZE 2048 121 #define MBUF_SIZE \ 122 (BUF_SIZE + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM) 123 124 #define MAX_FLOW_NUM UINT16_MAX 125 #define MIN_FLOW_NUM 1 126 #define DEF_FLOW_NUM 0x1000 127 128 /* TTL numbers are in ms. */ 129 #define MAX_FLOW_TTL (3600 * MS_PER_S) 130 #define MIN_FLOW_TTL 1 131 #define DEF_FLOW_TTL MS_PER_S 132 133 #define DEF_MBUF_NUM 0x400 134 135 /* Should be power of two. */ 136 #define IPV4_FRAG_TBL_BUCKET_ENTRIES 2 137 138 static uint32_t max_flow_num = DEF_FLOW_NUM; 139 static uint32_t max_flow_ttl = DEF_FLOW_TTL; 140 141 /* 142 * RX and TX Prefetch, Host, and Write-back threshold values should be 143 * carefully set for optimal performance. Consult the network 144 * controller's datasheet and supporting DPDK documentation for guidance 145 * on how these parameters should be set. 146 */ 147 #define RX_PTHRESH 8 /**< Default values of RX prefetch threshold reg. */ 148 #define RX_HTHRESH 8 /**< Default values of RX host threshold reg. */ 149 #define RX_WTHRESH 4 /**< Default values of RX write-back threshold reg. */ 150 151 /* 152 * These default values are optimized for use with the Intel(R) 82599 10 GbE 153 * Controller and the DPDK ixgbe PMD. Consider using other values for other 154 * network controllers and/or network drivers. 155 */ 156 #define TX_PTHRESH 36 /**< Default values of TX prefetch threshold reg. */ 157 #define TX_HTHRESH 0 /**< Default values of TX host threshold reg. */ 158 #define TX_WTHRESH 0 /**< Default values of TX write-back threshold reg. */ 159 160 #define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */ 161 162 #define NB_SOCKETS 8 163 164 /* Configure how many packets ahead to prefetch, when reading packets */ 165 #define PREFETCH_OFFSET 3 166 167 /* 168 * Configurable number of RX/TX ring descriptors 169 */ 170 #define RTE_TEST_RX_DESC_DEFAULT 128 171 #define RTE_TEST_TX_DESC_DEFAULT 512 172 173 static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT; 174 static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT; 175 176 /* ethernet addresses of ports */ 177 static struct ether_addr ports_eth_addr[MAX_PORTS]; 178 179 /* mask of enabled ports */ 180 static uint32_t enabled_port_mask = 0; 181 static int promiscuous_on = 0; /**< Ports set in promiscuous mode off by default. */ 182 static int numa_on = 1; /**< NUMA is enabled by default. */ 183 184 struct mbuf_table { 185 uint32_t len; 186 uint32_t head; 187 uint32_t tail; 188 struct rte_mbuf *m_table[0]; 189 }; 190 191 struct lcore_rx_queue { 192 uint8_t port_id; 193 uint8_t queue_id; 194 } __rte_cache_aligned; 195 196 #define MAX_RX_QUEUE_PER_LCORE 16 197 #define MAX_TX_QUEUE_PER_PORT MAX_PORTS 198 #define MAX_RX_QUEUE_PER_PORT 128 199 200 #define MAX_LCORE_PARAMS 1024 201 struct lcore_params { 202 uint8_t port_id; 203 uint8_t queue_id; 204 uint8_t lcore_id; 205 } __rte_cache_aligned; 206 207 static struct lcore_params lcore_params_array[MAX_LCORE_PARAMS]; 208 static struct lcore_params lcore_params_array_default[] = { 209 {0, 0, 2}, 210 {0, 1, 2}, 211 {0, 2, 2}, 212 {1, 0, 2}, 213 {1, 1, 2}, 214 {1, 2, 2}, 215 {2, 0, 2}, 216 {3, 0, 3}, 217 {3, 1, 3}, 218 }; 219 220 static struct lcore_params * lcore_params = lcore_params_array_default; 221 static uint16_t nb_lcore_params = sizeof(lcore_params_array_default) / 222 sizeof(lcore_params_array_default[0]); 223 224 static struct rte_eth_conf port_conf = { 225 .rxmode = { 226 .mq_mode = ETH_MQ_RX_RSS, 227 .max_rx_pkt_len = ETHER_MAX_LEN, 228 .split_hdr_size = 0, 229 .header_split = 0, /**< Header Split disabled */ 230 .hw_ip_checksum = 1, /**< IP checksum offload enabled */ 231 .hw_vlan_filter = 0, /**< VLAN filtering disabled */ 232 .jumbo_frame = 0, /**< Jumbo Frame Support disabled */ 233 .hw_strip_crc = 0, /**< CRC stripped by hardware */ 234 }, 235 .rx_adv_conf = { 236 .rss_conf = { 237 .rss_key = NULL, 238 .rss_hf = ETH_RSS_IPV4 | ETH_RSS_IPV6, 239 }, 240 }, 241 .txmode = { 242 .mq_mode = ETH_MQ_TX_NONE, 243 }, 244 }; 245 246 static const struct rte_eth_rxconf rx_conf = { 247 .rx_thresh = { 248 .pthresh = RX_PTHRESH, 249 .hthresh = RX_HTHRESH, 250 .wthresh = RX_WTHRESH, 251 }, 252 .rx_free_thresh = 32, 253 }; 254 255 static const struct rte_eth_txconf tx_conf = { 256 .tx_thresh = { 257 .pthresh = TX_PTHRESH, 258 .hthresh = TX_HTHRESH, 259 .wthresh = TX_WTHRESH, 260 }, 261 .tx_free_thresh = 0, /* Use PMD default values */ 262 .tx_rs_thresh = 0, /* Use PMD default values */ 263 .txq_flags = 0x0, 264 }; 265 266 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 267 268 #ifdef RTE_MACHINE_CPUFLAG_SSE4_2 269 #include <rte_hash_crc.h> 270 #define DEFAULT_HASH_FUNC rte_hash_crc 271 #else 272 #include <rte_jhash.h> 273 #define DEFAULT_HASH_FUNC rte_jhash 274 #endif 275 276 struct ipv4_5tuple { 277 uint32_t ip_dst; 278 uint32_t ip_src; 279 uint16_t port_dst; 280 uint16_t port_src; 281 uint8_t proto; 282 } __attribute__((__packed__)); 283 284 struct ipv6_5tuple { 285 uint8_t ip_dst[IPV6_ADDR_LEN]; 286 uint8_t ip_src[IPV6_ADDR_LEN]; 287 uint16_t port_dst; 288 uint16_t port_src; 289 uint8_t proto; 290 } __attribute__((__packed__)); 291 292 struct ipv4_l3fwd_route { 293 struct ipv4_5tuple key; 294 uint8_t if_out; 295 }; 296 297 struct ipv6_l3fwd_route { 298 struct ipv6_5tuple key; 299 uint8_t if_out; 300 }; 301 302 static struct ipv4_l3fwd_route ipv4_l3fwd_route_array[] = { 303 {{IPv4(100,10,0,1), IPv4(200,10,0,1), 101, 11, IPPROTO_TCP}, 0}, 304 {{IPv4(100,20,0,2), IPv4(200,20,0,2), 102, 12, IPPROTO_TCP}, 1}, 305 {{IPv4(100,30,0,3), IPv4(200,30,0,3), 103, 13, IPPROTO_TCP}, 2}, 306 {{IPv4(100,40,0,4), IPv4(200,40,0,4), 104, 14, IPPROTO_TCP}, 3}, 307 }; 308 309 static struct ipv6_l3fwd_route ipv6_l3fwd_route_array[] = { 310 { 311 { 312 {0xfe, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 313 0x02, 0x1b, 0x21, 0xff, 0xfe, 0x91, 0x38, 0x05}, 314 {0xfe, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 315 0x02, 0x1e, 0x67, 0xff, 0xfe, 0x0d, 0xb6, 0x0a}, 316 1, 10, IPPROTO_UDP 317 }, 4 318 }, 319 }; 320 321 typedef struct rte_hash lookup_struct_t; 322 static lookup_struct_t *ipv4_l3fwd_lookup_struct[NB_SOCKETS]; 323 static lookup_struct_t *ipv6_l3fwd_lookup_struct[NB_SOCKETS]; 324 325 #define L3FWD_HASH_ENTRIES 1024 326 327 #define IPV4_L3FWD_NUM_ROUTES \ 328 (sizeof(ipv4_l3fwd_route_array) / sizeof(ipv4_l3fwd_route_array[0])) 329 330 #define IPV6_L3FWD_NUM_ROUTES \ 331 (sizeof(ipv6_l3fwd_route_array) / sizeof(ipv6_l3fwd_route_array[0])) 332 333 static uint8_t ipv4_l3fwd_out_if[L3FWD_HASH_ENTRIES] __rte_cache_aligned; 334 static uint8_t ipv6_l3fwd_out_if[L3FWD_HASH_ENTRIES] __rte_cache_aligned; 335 #endif 336 337 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 338 struct ipv4_l3fwd_route { 339 uint32_t ip; 340 uint8_t depth; 341 uint8_t if_out; 342 }; 343 344 struct ipv6_l3fwd_route { 345 uint8_t ip[16]; 346 uint8_t depth; 347 uint8_t if_out; 348 }; 349 350 static struct ipv4_l3fwd_route ipv4_l3fwd_route_array[] = { 351 {IPv4(1,1,1,0), 24, 0}, 352 {IPv4(2,1,1,0), 24, 1}, 353 {IPv4(3,1,1,0), 24, 2}, 354 {IPv4(4,1,1,0), 24, 3}, 355 {IPv4(5,1,1,0), 24, 4}, 356 {IPv4(6,1,1,0), 24, 5}, 357 {IPv4(7,1,1,0), 24, 6}, 358 {IPv4(8,1,1,0), 24, 7}, 359 }; 360 361 static struct ipv6_l3fwd_route ipv6_l3fwd_route_array[] = { 362 {{1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 0}, 363 {{2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 1}, 364 {{3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 2}, 365 {{4,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 3}, 366 {{5,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 4}, 367 {{6,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 5}, 368 {{7,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 6}, 369 {{8,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 7}, 370 }; 371 372 #define IPV4_L3FWD_NUM_ROUTES \ 373 (sizeof(ipv4_l3fwd_route_array) / sizeof(ipv4_l3fwd_route_array[0])) 374 #define IPV6_L3FWD_NUM_ROUTES \ 375 (sizeof(ipv6_l3fwd_route_array) / sizeof(ipv6_l3fwd_route_array[0])) 376 377 #define IPV4_L3FWD_LPM_MAX_RULES 1024 378 #define IPV6_L3FWD_LPM_MAX_RULES 1024 379 #define IPV6_L3FWD_LPM_NUMBER_TBL8S (1 << 16) 380 381 typedef struct rte_lpm lookup_struct_t; 382 typedef struct rte_lpm6 lookup6_struct_t; 383 static lookup_struct_t *ipv4_l3fwd_lookup_struct[NB_SOCKETS]; 384 static lookup6_struct_t *ipv6_l3fwd_lookup_struct[NB_SOCKETS]; 385 #endif 386 387 struct tx_lcore_stat { 388 uint64_t call; 389 uint64_t drop; 390 uint64_t queue; 391 uint64_t send; 392 }; 393 394 #ifdef IPV4_FRAG_TBL_STAT 395 #define TX_LCORE_STAT_UPDATE(s, f, v) ((s)->f += (v)) 396 #else 397 #define TX_LCORE_STAT_UPDATE(s, f, v) do {} while (0) 398 #endif /* IPV4_FRAG_TBL_STAT */ 399 400 struct lcore_conf { 401 uint16_t n_rx_queue; 402 struct lcore_rx_queue rx_queue_list[MAX_RX_QUEUE_PER_LCORE]; 403 uint16_t tx_queue_id[MAX_PORTS]; 404 lookup_struct_t * ipv4_lookup_struct; 405 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 406 lookup6_struct_t * ipv6_lookup_struct; 407 #else 408 lookup_struct_t * ipv6_lookup_struct; 409 #endif 410 struct ipv4_frag_tbl *frag_tbl[MAX_RX_QUEUE_PER_LCORE]; 411 struct rte_mempool *pool[MAX_RX_QUEUE_PER_LCORE]; 412 struct ipv4_frag_death_row death_row; 413 struct mbuf_table *tx_mbufs[MAX_PORTS]; 414 struct tx_lcore_stat tx_stat; 415 } __rte_cache_aligned; 416 417 static struct lcore_conf lcore_conf[RTE_MAX_LCORE]; 418 419 /* 420 * If number of queued packets reached given threahold, then 421 * send burst of packets on an output interface. 422 */ 423 static inline uint32_t 424 send_burst(struct lcore_conf *qconf, uint32_t thresh, uint8_t port) 425 { 426 uint32_t fill, len, k, n; 427 struct mbuf_table *txmb; 428 429 txmb = qconf->tx_mbufs[port]; 430 len = txmb->len; 431 432 if ((int32_t)(fill = txmb->head - txmb->tail) < 0) 433 fill += len; 434 435 if (fill >= thresh) { 436 n = RTE_MIN(len - txmb->tail, fill); 437 438 k = rte_eth_tx_burst(port, qconf->tx_queue_id[port], 439 txmb->m_table + txmb->tail, (uint16_t)n); 440 441 TX_LCORE_STAT_UPDATE(&qconf->tx_stat, call, 1); 442 TX_LCORE_STAT_UPDATE(&qconf->tx_stat, send, k); 443 444 fill -= k; 445 if ((txmb->tail += k) == len) 446 txmb->tail = 0; 447 } 448 449 return (fill); 450 } 451 452 /* Enqueue a single packet, and send burst if queue is filled */ 453 static inline int 454 send_single_packet(struct rte_mbuf *m, uint8_t port) 455 { 456 uint32_t fill, lcore_id, len; 457 struct lcore_conf *qconf; 458 struct mbuf_table *txmb; 459 460 lcore_id = rte_lcore_id(); 461 qconf = &lcore_conf[lcore_id]; 462 463 txmb = qconf->tx_mbufs[port]; 464 len = txmb->len; 465 466 fill = send_burst(qconf, MAX_PKT_BURST, port); 467 468 if (fill == len - 1) { 469 TX_LCORE_STAT_UPDATE(&qconf->tx_stat, drop, 1); 470 rte_pktmbuf_free(txmb->m_table[txmb->tail]); 471 if (++txmb->tail == len) 472 txmb->tail = 0; 473 } 474 475 TX_LCORE_STAT_UPDATE(&qconf->tx_stat, queue, 1); 476 txmb->m_table[txmb->head] = m; 477 if(++txmb->head == len) 478 txmb->head = 0; 479 480 return (0); 481 } 482 483 #ifdef DO_RFC_1812_CHECKS 484 static inline int 485 is_valid_ipv4_pkt(struct ipv4_hdr *pkt, uint32_t link_len) 486 { 487 /* From http://www.rfc-editor.org/rfc/rfc1812.txt section 5.2.2 */ 488 /* 489 * 1. The packet length reported by the Link Layer must be large 490 * enough to hold the minimum length legal IP datagram (20 bytes). 491 */ 492 if (link_len < sizeof(struct ipv4_hdr)) 493 return -1; 494 495 /* 2. The IP checksum must be correct. */ 496 /* this is checked in H/W */ 497 498 /* 499 * 3. The IP version number must be 4. If the version number is not 4 500 * then the packet may be another version of IP, such as IPng or 501 * ST-II. 502 */ 503 if (((pkt->version_ihl) >> 4) != 4) 504 return -3; 505 /* 506 * 4. The IP header length field must be large enough to hold the 507 * minimum length legal IP datagram (20 bytes = 5 words). 508 */ 509 if ((pkt->version_ihl & 0xf) < 5) 510 return -4; 511 512 /* 513 * 5. The IP total length field must be large enough to hold the IP 514 * datagram header, whose length is specified in the IP header length 515 * field. 516 */ 517 if (rte_cpu_to_be_16(pkt->total_length) < sizeof(struct ipv4_hdr)) 518 return -5; 519 520 return 0; 521 } 522 #endif 523 524 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 525 static void 526 print_ipv4_key(struct ipv4_5tuple key) 527 { 528 printf("IP dst = %08x, IP src = %08x, port dst = %d, port src = %d, proto = %d\n", 529 (unsigned)key.ip_dst, (unsigned)key.ip_src, key.port_dst, key.port_src, key.proto); 530 } 531 static void 532 print_ipv6_key(struct ipv6_5tuple key) 533 { 534 printf( "IP dst = " IPv6_BYTES_FMT ", IP src = " IPv6_BYTES_FMT ", " 535 "port dst = %d, port src = %d, proto = %d\n", 536 IPv6_BYTES(key.ip_dst), IPv6_BYTES(key.ip_src), 537 key.port_dst, key.port_src, key.proto); 538 } 539 540 static inline uint8_t 541 get_ipv4_dst_port(struct ipv4_hdr *ipv4_hdr, uint8_t portid, lookup_struct_t * ipv4_l3fwd_lookup_struct) 542 { 543 struct ipv4_5tuple key; 544 struct tcp_hdr *tcp; 545 struct udp_hdr *udp; 546 int ret = 0; 547 548 key.ip_dst = rte_be_to_cpu_32(ipv4_hdr->dst_addr); 549 key.ip_src = rte_be_to_cpu_32(ipv4_hdr->src_addr); 550 key.proto = ipv4_hdr->next_proto_id; 551 552 switch (ipv4_hdr->next_proto_id) { 553 case IPPROTO_TCP: 554 tcp = (struct tcp_hdr *)((unsigned char *) ipv4_hdr + 555 sizeof(struct ipv4_hdr)); 556 key.port_dst = rte_be_to_cpu_16(tcp->dst_port); 557 key.port_src = rte_be_to_cpu_16(tcp->src_port); 558 break; 559 560 case IPPROTO_UDP: 561 udp = (struct udp_hdr *)((unsigned char *) ipv4_hdr + 562 sizeof(struct ipv4_hdr)); 563 key.port_dst = rte_be_to_cpu_16(udp->dst_port); 564 key.port_src = rte_be_to_cpu_16(udp->src_port); 565 break; 566 567 default: 568 key.port_dst = 0; 569 key.port_src = 0; 570 break; 571 } 572 573 /* Find destination port */ 574 ret = rte_hash_lookup(ipv4_l3fwd_lookup_struct, (const void *)&key); 575 return (uint8_t)((ret < 0)? portid : ipv4_l3fwd_out_if[ret]); 576 } 577 578 static inline uint8_t 579 get_ipv6_dst_port(struct ipv6_hdr *ipv6_hdr, uint8_t portid, lookup_struct_t * ipv6_l3fwd_lookup_struct) 580 { 581 struct ipv6_5tuple key; 582 struct tcp_hdr *tcp; 583 struct udp_hdr *udp; 584 int ret = 0; 585 586 memcpy(key.ip_dst, ipv6_hdr->dst_addr, IPV6_ADDR_LEN); 587 memcpy(key.ip_src, ipv6_hdr->src_addr, IPV6_ADDR_LEN); 588 589 key.proto = ipv6_hdr->proto; 590 591 switch (ipv6_hdr->proto) { 592 case IPPROTO_TCP: 593 tcp = (struct tcp_hdr *)((unsigned char *) ipv6_hdr + 594 sizeof(struct ipv6_hdr)); 595 key.port_dst = rte_be_to_cpu_16(tcp->dst_port); 596 key.port_src = rte_be_to_cpu_16(tcp->src_port); 597 break; 598 599 case IPPROTO_UDP: 600 udp = (struct udp_hdr *)((unsigned char *) ipv6_hdr + 601 sizeof(struct ipv6_hdr)); 602 key.port_dst = rte_be_to_cpu_16(udp->dst_port); 603 key.port_src = rte_be_to_cpu_16(udp->src_port); 604 break; 605 606 default: 607 key.port_dst = 0; 608 key.port_src = 0; 609 break; 610 } 611 612 /* Find destination port */ 613 ret = rte_hash_lookup(ipv6_l3fwd_lookup_struct, (const void *)&key); 614 return (uint8_t)((ret < 0)? portid : ipv6_l3fwd_out_if[ret]); 615 } 616 #endif 617 618 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 619 static inline uint8_t 620 get_ipv4_dst_port(struct ipv4_hdr *ipv4_hdr, uint8_t portid, lookup_struct_t * ipv4_l3fwd_lookup_struct) 621 { 622 uint8_t next_hop; 623 624 return (uint8_t) ((rte_lpm_lookup(ipv4_l3fwd_lookup_struct, 625 rte_be_to_cpu_32(ipv4_hdr->dst_addr), &next_hop) == 0)? 626 next_hop : portid); 627 } 628 629 static inline uint8_t 630 get_ipv6_dst_port(struct ipv6_hdr *ipv6_hdr, uint8_t portid, lookup6_struct_t * ipv6_l3fwd_lookup_struct) 631 { 632 uint8_t next_hop; 633 634 return (uint8_t) ((rte_lpm6_lookup(ipv6_l3fwd_lookup_struct, 635 ipv6_hdr->dst_addr, &next_hop) == 0)? 636 next_hop : portid); 637 } 638 #endif 639 640 static inline void 641 l3fwd_simple_forward(struct rte_mbuf *m, uint8_t portid, uint32_t queue, 642 struct lcore_conf *qconf, uint64_t tms) 643 { 644 struct ether_hdr *eth_hdr; 645 struct ipv4_hdr *ipv4_hdr; 646 void *d_addr_bytes; 647 uint8_t dst_port; 648 uint16_t flag_offset, ip_flag, ip_ofs; 649 650 eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *); 651 652 if (m->ol_flags & PKT_RX_IPV4_HDR) { 653 /* Handle IPv4 headers.*/ 654 ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + 1); 655 656 #ifdef DO_RFC_1812_CHECKS 657 /* Check to make sure the packet is valid (RFC1812) */ 658 if (is_valid_ipv4_pkt(ipv4_hdr, m->pkt.pkt_len) < 0) { 659 rte_pktmbuf_free(m); 660 return; 661 } 662 663 /* Update time to live and header checksum */ 664 --(ipv4_hdr->time_to_live); 665 ++(ipv4_hdr->hdr_checksum); 666 #endif 667 668 flag_offset = rte_be_to_cpu_16(ipv4_hdr->fragment_offset); 669 ip_ofs = (uint16_t)(flag_offset & IPV4_HDR_OFFSET_MASK); 670 ip_flag = (uint16_t)(flag_offset & IPV4_HDR_MF_FLAG); 671 672 /* if it is a fragmented packet, then try to reassemble. */ 673 if (ip_flag != 0 || ip_ofs != 0) { 674 675 struct rte_mbuf *mo; 676 struct ipv4_frag_tbl *tbl; 677 struct ipv4_frag_death_row *dr; 678 679 tbl = qconf->frag_tbl[queue]; 680 dr = &qconf->death_row; 681 682 /* prepare mbuf: setup l2_len/l3_len. */ 683 m->pkt.vlan_macip.f.l2_len = sizeof(*eth_hdr); 684 m->pkt.vlan_macip.f.l3_len = sizeof(*ipv4_hdr); 685 686 /* process this fragment. */ 687 if ((mo = ipv4_frag_mbuf(tbl, dr, m, tms, ipv4_hdr, 688 ip_ofs, ip_flag)) == NULL) 689 /* no packet to send out. */ 690 return; 691 692 /* we have our packet reassembled. */ 693 if (mo != m) { 694 m = mo; 695 eth_hdr = rte_pktmbuf_mtod(m, 696 struct ether_hdr *); 697 ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + 1); 698 } 699 } 700 701 dst_port = get_ipv4_dst_port(ipv4_hdr, portid, 702 qconf->ipv4_lookup_struct); 703 if (dst_port >= MAX_PORTS || 704 (enabled_port_mask & 1 << dst_port) == 0) 705 dst_port = portid; 706 707 /* 02:00:00:00:00:xx */ 708 d_addr_bytes = ð_hdr->d_addr.addr_bytes[0]; 709 *((uint64_t *)d_addr_bytes) = 0x000000000002 + ((uint64_t)dst_port << 40); 710 711 /* src addr */ 712 ether_addr_copy(&ports_eth_addr[dst_port], ð_hdr->s_addr); 713 714 send_single_packet(m, dst_port); 715 } 716 else { 717 /* Handle IPv6 headers.*/ 718 struct ipv6_hdr *ipv6_hdr; 719 720 ipv6_hdr = (struct ipv6_hdr *)(rte_pktmbuf_mtod(m, unsigned char *) + 721 sizeof(struct ether_hdr)); 722 723 dst_port = get_ipv6_dst_port(ipv6_hdr, portid, qconf->ipv6_lookup_struct); 724 725 if (dst_port >= MAX_PORTS || (enabled_port_mask & 1 << dst_port) == 0) 726 dst_port = portid; 727 728 /* 02:00:00:00:00:xx */ 729 d_addr_bytes = ð_hdr->d_addr.addr_bytes[0]; 730 *((uint64_t *)d_addr_bytes) = 0x000000000002 + ((uint64_t)dst_port << 40); 731 732 /* src addr */ 733 ether_addr_copy(&ports_eth_addr[dst_port], ð_hdr->s_addr); 734 735 send_single_packet(m, dst_port); 736 } 737 738 } 739 740 /* main processing loop */ 741 static int 742 main_loop(__attribute__((unused)) void *dummy) 743 { 744 struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; 745 unsigned lcore_id; 746 uint64_t diff_tsc, cur_tsc, prev_tsc; 747 int i, j, nb_rx; 748 uint8_t portid, queueid; 749 struct lcore_conf *qconf; 750 const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * BURST_TX_DRAIN_US; 751 752 prev_tsc = 0; 753 754 lcore_id = rte_lcore_id(); 755 qconf = &lcore_conf[lcore_id]; 756 757 if (qconf->n_rx_queue == 0) { 758 RTE_LOG(INFO, L3FWD, "lcore %u has nothing to do\n", lcore_id); 759 return 0; 760 } 761 762 RTE_LOG(INFO, L3FWD, "entering main loop on lcore %u\n", lcore_id); 763 764 for (i = 0; i < qconf->n_rx_queue; i++) { 765 766 portid = qconf->rx_queue_list[i].port_id; 767 queueid = qconf->rx_queue_list[i].queue_id; 768 RTE_LOG(INFO, L3FWD, " -- lcoreid=%u portid=%hhu rxqueueid=%hhu\n", lcore_id, 769 portid, queueid); 770 } 771 772 while (1) { 773 774 cur_tsc = rte_rdtsc(); 775 776 /* 777 * TX burst queue drain 778 */ 779 diff_tsc = cur_tsc - prev_tsc; 780 if (unlikely(diff_tsc > drain_tsc)) { 781 782 /* 783 * This could be optimized (use queueid instead of 784 * portid), but it is not called so often 785 */ 786 for (portid = 0; portid < MAX_PORTS; portid++) { 787 if ((enabled_port_mask & (1 << portid)) != 0) 788 send_burst(qconf, 1, portid); 789 } 790 791 prev_tsc = cur_tsc; 792 } 793 794 /* 795 * Read packet from RX queues 796 */ 797 for (i = 0; i < qconf->n_rx_queue; ++i) { 798 799 portid = qconf->rx_queue_list[i].port_id; 800 queueid = qconf->rx_queue_list[i].queue_id; 801 802 nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst, 803 MAX_PKT_BURST); 804 805 /* Prefetch first packets */ 806 for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++) { 807 rte_prefetch0(rte_pktmbuf_mtod( 808 pkts_burst[j], void *)); 809 } 810 811 /* Prefetch and forward already prefetched packets */ 812 for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) { 813 rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[ 814 j + PREFETCH_OFFSET], void *)); 815 l3fwd_simple_forward(pkts_burst[j], portid, 816 i, qconf, cur_tsc); 817 } 818 819 /* Forward remaining prefetched packets */ 820 for (; j < nb_rx; j++) { 821 l3fwd_simple_forward(pkts_burst[j], portid, 822 i, qconf, cur_tsc); 823 } 824 825 ipv4_frag_free_death_row(&qconf->death_row, 826 PREFETCH_OFFSET); 827 } 828 } 829 } 830 831 static int 832 check_lcore_params(void) 833 { 834 uint8_t queue, lcore; 835 uint16_t i; 836 int socketid; 837 838 for (i = 0; i < nb_lcore_params; ++i) { 839 queue = lcore_params[i].queue_id; 840 if (queue >= MAX_RX_QUEUE_PER_PORT) { 841 printf("invalid queue number: %hhu\n", queue); 842 return -1; 843 } 844 lcore = lcore_params[i].lcore_id; 845 if (!rte_lcore_is_enabled(lcore)) { 846 printf("error: lcore %hhu is not enabled in lcore mask\n", lcore); 847 return -1; 848 } 849 if ((socketid = rte_lcore_to_socket_id(lcore) != 0) && 850 (numa_on == 0)) { 851 printf("warning: lcore %hhu is on socket %d with numa off \n", 852 lcore, socketid); 853 } 854 } 855 return 0; 856 } 857 858 static int 859 check_port_config(const unsigned nb_ports) 860 { 861 unsigned portid; 862 uint16_t i; 863 864 for (i = 0; i < nb_lcore_params; ++i) { 865 portid = lcore_params[i].port_id; 866 if ((enabled_port_mask & (1 << portid)) == 0) { 867 printf("port %u is not enabled in port mask\n", portid); 868 return -1; 869 } 870 if (portid >= nb_ports) { 871 printf("port %u is not present on the board\n", portid); 872 return -1; 873 } 874 } 875 return 0; 876 } 877 878 static uint8_t 879 get_port_n_rx_queues(const uint8_t port) 880 { 881 int queue = -1; 882 uint16_t i; 883 884 for (i = 0; i < nb_lcore_params; ++i) { 885 if (lcore_params[i].port_id == port && lcore_params[i].queue_id > queue) 886 queue = lcore_params[i].queue_id; 887 } 888 return (uint8_t)(++queue); 889 } 890 891 static int 892 init_lcore_rx_queues(void) 893 { 894 uint16_t i, nb_rx_queue; 895 uint8_t lcore; 896 897 for (i = 0; i < nb_lcore_params; ++i) { 898 lcore = lcore_params[i].lcore_id; 899 nb_rx_queue = lcore_conf[lcore].n_rx_queue; 900 if (nb_rx_queue >= MAX_RX_QUEUE_PER_LCORE) { 901 printf("error: too many queues (%u) for lcore: %u\n", 902 (unsigned)nb_rx_queue + 1, (unsigned)lcore); 903 return -1; 904 } else { 905 lcore_conf[lcore].rx_queue_list[nb_rx_queue].port_id = 906 lcore_params[i].port_id; 907 lcore_conf[lcore].rx_queue_list[nb_rx_queue].queue_id = 908 lcore_params[i].queue_id; 909 lcore_conf[lcore].n_rx_queue++; 910 } 911 } 912 return 0; 913 } 914 915 /* display usage */ 916 static void 917 print_usage(const char *prgname) 918 { 919 printf ("%s [EAL options] -- -p PORTMASK -P" 920 " [--config (port,queue,lcore)[,(port,queue,lcore]]" 921 " [--enable-jumbo [--max-pkt-len PKTLEN]]" 922 " [--maxflows=<flows>] [--flowttl=<ttl>[(s|ms)]]\n" 923 " -p PORTMASK: hexadecimal bitmask of ports to configure\n" 924 " -P : enable promiscuous mode\n" 925 " --config (port,queue,lcore): rx queues configuration\n" 926 " --no-numa: optional, disable numa awareness\n" 927 " --enable-jumbo: enable jumbo frame" 928 " which max packet len is PKTLEN in decimal (64-9600)\n" 929 " --maxflows=<flows>: optional, maximum number of flows " 930 "supported\n" 931 " --flowttl=<ttl>[(s|ms)]: optional, maximum TTL for each " 932 "flow\n", 933 prgname); 934 } 935 936 static uint32_t 937 parse_flow_num(const char *str, uint32_t min, uint32_t max, uint32_t *val) 938 { 939 char *end; 940 uint64_t v; 941 942 /* parse decimal string */ 943 errno = 0; 944 v = strtoul(str, &end, 10); 945 if (errno != 0 || *end != '\0') 946 return (-EINVAL); 947 948 if (v < min || v > max) 949 return (-EINVAL); 950 951 *val = (uint32_t)v; 952 return (0); 953 } 954 955 static int 956 parse_flow_ttl(const char *str, uint32_t min, uint32_t max, uint32_t *val) 957 { 958 char *end; 959 uint64_t v; 960 961 static const char frmt_sec[] = "s"; 962 static const char frmt_msec[] = "ms"; 963 964 /* parse decimal string */ 965 errno = 0; 966 v = strtoul(str, &end, 10); 967 if (errno != 0) 968 return (-EINVAL); 969 970 if (*end != '\0') { 971 if (strncmp(frmt_sec, end, sizeof(frmt_sec)) == 0) 972 v *= MS_PER_S; 973 else if (strncmp(frmt_msec, end, sizeof (frmt_msec)) != 0) 974 return (-EINVAL); 975 } 976 977 if (v < min || v > max) 978 return (-EINVAL); 979 980 *val = (uint32_t)v; 981 return (0); 982 } 983 984 985 static int parse_max_pkt_len(const char *pktlen) 986 { 987 char *end = NULL; 988 unsigned long len; 989 990 /* parse decimal string */ 991 len = strtoul(pktlen, &end, 10); 992 if ((pktlen[0] == '\0') || (end == NULL) || (*end != '\0')) 993 return -1; 994 995 if (len == 0) 996 return -1; 997 998 return len; 999 } 1000 1001 static int 1002 parse_portmask(const char *portmask) 1003 { 1004 char *end = NULL; 1005 unsigned long pm; 1006 1007 /* parse hexadecimal string */ 1008 pm = strtoul(portmask, &end, 16); 1009 if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0')) 1010 return -1; 1011 1012 if (pm == 0) 1013 return -1; 1014 1015 return pm; 1016 } 1017 1018 static int 1019 parse_config(const char *q_arg) 1020 { 1021 char s[256]; 1022 const char *p, *p0 = q_arg; 1023 char *end; 1024 enum fieldnames { 1025 FLD_PORT = 0, 1026 FLD_QUEUE, 1027 FLD_LCORE, 1028 _NUM_FLD 1029 }; 1030 unsigned long int_fld[_NUM_FLD]; 1031 char *str_fld[_NUM_FLD]; 1032 int i; 1033 unsigned size; 1034 1035 nb_lcore_params = 0; 1036 1037 while ((p = strchr(p0,'(')) != NULL) { 1038 ++p; 1039 if((p0 = strchr(p,')')) == NULL) 1040 return -1; 1041 1042 size = p0 - p; 1043 if(size >= sizeof(s)) 1044 return -1; 1045 1046 rte_snprintf(s, sizeof(s), "%.*s", size, p); 1047 if (rte_strsplit(s, sizeof(s), str_fld, _NUM_FLD, ',') != _NUM_FLD) 1048 return -1; 1049 for (i = 0; i < _NUM_FLD; i++){ 1050 errno = 0; 1051 int_fld[i] = strtoul(str_fld[i], &end, 0); 1052 if (errno != 0 || end == str_fld[i] || int_fld[i] > 255) 1053 return -1; 1054 } 1055 if (nb_lcore_params >= MAX_LCORE_PARAMS) { 1056 printf("exceeded max number of lcore params: %hu\n", 1057 nb_lcore_params); 1058 return -1; 1059 } 1060 lcore_params_array[nb_lcore_params].port_id = (uint8_t)int_fld[FLD_PORT]; 1061 lcore_params_array[nb_lcore_params].queue_id = (uint8_t)int_fld[FLD_QUEUE]; 1062 lcore_params_array[nb_lcore_params].lcore_id = (uint8_t)int_fld[FLD_LCORE]; 1063 ++nb_lcore_params; 1064 } 1065 lcore_params = lcore_params_array; 1066 return 0; 1067 } 1068 1069 /* Parse the argument given in the command line of the application */ 1070 static int 1071 parse_args(int argc, char **argv) 1072 { 1073 int opt, ret; 1074 char **argvopt; 1075 int option_index; 1076 char *prgname = argv[0]; 1077 static struct option lgopts[] = { 1078 {"config", 1, 0, 0}, 1079 {"no-numa", 0, 0, 0}, 1080 {"enable-jumbo", 0, 0, 0}, 1081 {"maxflows", 1, 0, 0}, 1082 {"flowttl", 1, 0, 0}, 1083 {NULL, 0, 0, 0} 1084 }; 1085 1086 argvopt = argv; 1087 1088 while ((opt = getopt_long(argc, argvopt, "p:P", 1089 lgopts, &option_index)) != EOF) { 1090 1091 switch (opt) { 1092 /* portmask */ 1093 case 'p': 1094 enabled_port_mask = parse_portmask(optarg); 1095 if (enabled_port_mask == 0) { 1096 printf("invalid portmask\n"); 1097 print_usage(prgname); 1098 return -1; 1099 } 1100 break; 1101 case 'P': 1102 printf("Promiscuous mode selected\n"); 1103 promiscuous_on = 1; 1104 break; 1105 1106 /* long options */ 1107 case 0: 1108 if (!strncmp(lgopts[option_index].name, "config", 6)) { 1109 ret = parse_config(optarg); 1110 if (ret) { 1111 printf("invalid config\n"); 1112 print_usage(prgname); 1113 return -1; 1114 } 1115 } 1116 1117 if (!strncmp(lgopts[option_index].name, "no-numa", 7)) { 1118 printf("numa is disabled \n"); 1119 numa_on = 0; 1120 } 1121 1122 if (!strncmp(lgopts[option_index].name, 1123 "maxflows", 8)) { 1124 if ((ret = parse_flow_num(optarg, MIN_FLOW_NUM, 1125 MAX_FLOW_NUM, 1126 &max_flow_num)) != 0) { 1127 printf("invalid value: \"%s\" for " 1128 "parameter %s\n", 1129 optarg, 1130 lgopts[option_index].name); 1131 print_usage(prgname); 1132 return (ret); 1133 } 1134 } 1135 1136 if (!strncmp(lgopts[option_index].name, "flowttl", 7)) { 1137 if ((ret = parse_flow_ttl(optarg, MIN_FLOW_TTL, 1138 MAX_FLOW_TTL, 1139 &max_flow_ttl)) != 0) { 1140 printf("invalid value: \"%s\" for " 1141 "parameter %s\n", 1142 optarg, 1143 lgopts[option_index].name); 1144 print_usage(prgname); 1145 return (ret); 1146 } 1147 } 1148 1149 if (!strncmp(lgopts[option_index].name, "enable-jumbo", 12)) { 1150 struct option lenopts = {"max-pkt-len", required_argument, 0, 0}; 1151 1152 printf("jumbo frame is enabled \n"); 1153 port_conf.rxmode.jumbo_frame = 1; 1154 1155 /* if no max-pkt-len set, use the default value ETHER_MAX_LEN */ 1156 if (0 == getopt_long(argc, argvopt, "", &lenopts, &option_index)) { 1157 ret = parse_max_pkt_len(optarg); 1158 if ((ret < 64) || (ret > MAX_JUMBO_PKT_LEN)){ 1159 printf("invalid packet length\n"); 1160 print_usage(prgname); 1161 return -1; 1162 } 1163 port_conf.rxmode.max_rx_pkt_len = ret; 1164 } 1165 printf("set jumbo frame max packet length to %u\n", 1166 (unsigned int)port_conf.rxmode.max_rx_pkt_len); 1167 } 1168 1169 break; 1170 1171 default: 1172 print_usage(prgname); 1173 return -1; 1174 } 1175 } 1176 1177 if (optind >= 0) 1178 argv[optind-1] = prgname; 1179 1180 ret = optind-1; 1181 optind = 0; /* reset getopt lib */ 1182 return ret; 1183 } 1184 1185 static void 1186 print_ethaddr(const char *name, const struct ether_addr *eth_addr) 1187 { 1188 printf ("%s%02X:%02X:%02X:%02X:%02X:%02X", name, 1189 eth_addr->addr_bytes[0], 1190 eth_addr->addr_bytes[1], 1191 eth_addr->addr_bytes[2], 1192 eth_addr->addr_bytes[3], 1193 eth_addr->addr_bytes[4], 1194 eth_addr->addr_bytes[5]); 1195 } 1196 1197 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) 1198 static void 1199 setup_hash(int socketid) 1200 { 1201 struct rte_hash_parameters ipv4_l3fwd_hash_params = { 1202 .name = NULL, 1203 .entries = L3FWD_HASH_ENTRIES, 1204 .bucket_entries = 4, 1205 .key_len = sizeof(struct ipv4_5tuple), 1206 .hash_func = DEFAULT_HASH_FUNC, 1207 .hash_func_init_val = 0, 1208 }; 1209 1210 struct rte_hash_parameters ipv6_l3fwd_hash_params = { 1211 .name = NULL, 1212 .entries = L3FWD_HASH_ENTRIES, 1213 .bucket_entries = 4, 1214 .key_len = sizeof(struct ipv6_5tuple), 1215 .hash_func = DEFAULT_HASH_FUNC, 1216 .hash_func_init_val = 0, 1217 }; 1218 1219 unsigned i; 1220 int ret; 1221 char s[64]; 1222 1223 /* create ipv4 hash */ 1224 rte_snprintf(s, sizeof(s), "ipv4_l3fwd_hash_%d", socketid); 1225 ipv4_l3fwd_hash_params.name = s; 1226 ipv4_l3fwd_hash_params.socket_id = socketid; 1227 ipv4_l3fwd_lookup_struct[socketid] = rte_hash_create(&ipv4_l3fwd_hash_params); 1228 if (ipv4_l3fwd_lookup_struct[socketid] == NULL) 1229 rte_exit(EXIT_FAILURE, "Unable to create the l3fwd hash on " 1230 "socket %d\n", socketid); 1231 1232 /* create ipv6 hash */ 1233 rte_snprintf(s, sizeof(s), "ipv6_l3fwd_hash_%d", socketid); 1234 ipv6_l3fwd_hash_params.name = s; 1235 ipv6_l3fwd_hash_params.socket_id = socketid; 1236 ipv6_l3fwd_lookup_struct[socketid] = rte_hash_create(&ipv6_l3fwd_hash_params); 1237 if (ipv6_l3fwd_lookup_struct[socketid] == NULL) 1238 rte_exit(EXIT_FAILURE, "Unable to create the l3fwd hash on " 1239 "socket %d\n", socketid); 1240 1241 1242 /* populate the ipv4 hash */ 1243 for (i = 0; i < IPV4_L3FWD_NUM_ROUTES; i++) { 1244 ret = rte_hash_add_key (ipv4_l3fwd_lookup_struct[socketid], 1245 (void *) &ipv4_l3fwd_route_array[i].key); 1246 if (ret < 0) { 1247 rte_exit(EXIT_FAILURE, "Unable to add entry %u to the" 1248 "l3fwd hash on socket %d\n", i, socketid); 1249 } 1250 ipv4_l3fwd_out_if[ret] = ipv4_l3fwd_route_array[i].if_out; 1251 printf("Hash: Adding key\n"); 1252 print_ipv4_key(ipv4_l3fwd_route_array[i].key); 1253 } 1254 1255 /* populate the ipv6 hash */ 1256 for (i = 0; i < IPV6_L3FWD_NUM_ROUTES; i++) { 1257 ret = rte_hash_add_key (ipv6_l3fwd_lookup_struct[socketid], 1258 (void *) &ipv6_l3fwd_route_array[i].key); 1259 if (ret < 0) { 1260 rte_exit(EXIT_FAILURE, "Unable to add entry %u to the" 1261 "l3fwd hash on socket %d\n", i, socketid); 1262 } 1263 ipv6_l3fwd_out_if[ret] = ipv6_l3fwd_route_array[i].if_out; 1264 printf("Hash: Adding key\n"); 1265 print_ipv6_key(ipv6_l3fwd_route_array[i].key); 1266 } 1267 } 1268 #endif 1269 1270 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 1271 static void 1272 setup_lpm(int socketid) 1273 { 1274 struct rte_lpm6_config config; 1275 unsigned i; 1276 int ret; 1277 char s[64]; 1278 1279 /* create the LPM table */ 1280 rte_snprintf(s, sizeof(s), "IPV4_L3FWD_LPM_%d", socketid); 1281 ipv4_l3fwd_lookup_struct[socketid] = rte_lpm_create(s, socketid, 1282 IPV4_L3FWD_LPM_MAX_RULES, 0); 1283 if (ipv4_l3fwd_lookup_struct[socketid] == NULL) 1284 rte_exit(EXIT_FAILURE, "Unable to create the l3fwd LPM table" 1285 " on socket %d\n", socketid); 1286 1287 /* populate the LPM table */ 1288 for (i = 0; i < IPV4_L3FWD_NUM_ROUTES; i++) { 1289 ret = rte_lpm_add(ipv4_l3fwd_lookup_struct[socketid], 1290 ipv4_l3fwd_route_array[i].ip, 1291 ipv4_l3fwd_route_array[i].depth, 1292 ipv4_l3fwd_route_array[i].if_out); 1293 1294 if (ret < 0) { 1295 rte_exit(EXIT_FAILURE, "Unable to add entry %u to the " 1296 "l3fwd LPM table on socket %d\n", 1297 i, socketid); 1298 } 1299 1300 printf("LPM: Adding route 0x%08x / %d (%d)\n", 1301 (unsigned)ipv4_l3fwd_route_array[i].ip, 1302 ipv4_l3fwd_route_array[i].depth, 1303 ipv4_l3fwd_route_array[i].if_out); 1304 } 1305 1306 /* create the LPM6 table */ 1307 rte_snprintf(s, sizeof(s), "IPV6_L3FWD_LPM_%d", socketid); 1308 1309 config.max_rules = IPV6_L3FWD_LPM_MAX_RULES; 1310 config.number_tbl8s = IPV6_L3FWD_LPM_NUMBER_TBL8S; 1311 config.flags = 0; 1312 ipv6_l3fwd_lookup_struct[socketid] = rte_lpm6_create(s, socketid, 1313 &config); 1314 if (ipv6_l3fwd_lookup_struct[socketid] == NULL) 1315 rte_exit(EXIT_FAILURE, "Unable to create the l3fwd LPM table" 1316 " on socket %d\n", socketid); 1317 1318 /* populate the LPM table */ 1319 for (i = 0; i < IPV6_L3FWD_NUM_ROUTES; i++) { 1320 ret = rte_lpm6_add(ipv6_l3fwd_lookup_struct[socketid], 1321 ipv6_l3fwd_route_array[i].ip, 1322 ipv6_l3fwd_route_array[i].depth, 1323 ipv6_l3fwd_route_array[i].if_out); 1324 1325 if (ret < 0) { 1326 rte_exit(EXIT_FAILURE, "Unable to add entry %u to the " 1327 "l3fwd LPM table on socket %d\n", 1328 i, socketid); 1329 } 1330 1331 printf("LPM: Adding route %s / %d (%d)\n", 1332 "IPV6", 1333 ipv6_l3fwd_route_array[i].depth, 1334 ipv6_l3fwd_route_array[i].if_out); 1335 } 1336 } 1337 #endif 1338 1339 static int 1340 init_mem(void) 1341 { 1342 struct lcore_conf *qconf; 1343 int socketid; 1344 unsigned lcore_id; 1345 1346 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { 1347 if (rte_lcore_is_enabled(lcore_id) == 0) 1348 continue; 1349 1350 if (numa_on) 1351 socketid = rte_lcore_to_socket_id(lcore_id); 1352 else 1353 socketid = 0; 1354 1355 if (socketid >= NB_SOCKETS) { 1356 rte_exit(EXIT_FAILURE, 1357 "Socket %d of lcore %u is out of range %d\n", 1358 socketid, lcore_id, NB_SOCKETS); 1359 } 1360 1361 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) 1362 setup_lpm(socketid); 1363 #else 1364 setup_hash(socketid); 1365 #endif 1366 qconf = &lcore_conf[lcore_id]; 1367 qconf->ipv4_lookup_struct = ipv4_l3fwd_lookup_struct[socketid]; 1368 qconf->ipv6_lookup_struct = ipv6_l3fwd_lookup_struct[socketid]; 1369 } 1370 return 0; 1371 } 1372 1373 /* Check the link status of all ports in up to 9s, and print them finally */ 1374 static void 1375 check_all_ports_link_status(uint8_t port_num, uint32_t port_mask) 1376 { 1377 #define CHECK_INTERVAL 100 /* 100ms */ 1378 #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */ 1379 uint8_t portid, count, all_ports_up, print_flag = 0; 1380 struct rte_eth_link link; 1381 1382 printf("\nChecking link status"); 1383 fflush(stdout); 1384 for (count = 0; count <= MAX_CHECK_TIME; count++) { 1385 all_ports_up = 1; 1386 for (portid = 0; portid < port_num; portid++) { 1387 if ((port_mask & (1 << portid)) == 0) 1388 continue; 1389 memset(&link, 0, sizeof(link)); 1390 rte_eth_link_get_nowait(portid, &link); 1391 /* print link status if flag set */ 1392 if (print_flag == 1) { 1393 if (link.link_status) 1394 printf("Port %d Link Up - speed %u " 1395 "Mbps - %s\n", (uint8_t)portid, 1396 (unsigned)link.link_speed, 1397 (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? 1398 ("full-duplex") : ("half-duplex\n")); 1399 else 1400 printf("Port %d Link Down\n", 1401 (uint8_t)portid); 1402 continue; 1403 } 1404 /* clear all_ports_up flag if any link down */ 1405 if (link.link_status == 0) { 1406 all_ports_up = 0; 1407 break; 1408 } 1409 } 1410 /* after finally printing all link status, get out */ 1411 if (print_flag == 1) 1412 break; 1413 1414 if (all_ports_up == 0) { 1415 printf("."); 1416 fflush(stdout); 1417 rte_delay_ms(CHECK_INTERVAL); 1418 } 1419 1420 /* set the print_flag if all ports up or timeout */ 1421 if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) { 1422 print_flag = 1; 1423 printf("done\n"); 1424 } 1425 } 1426 } 1427 static void 1428 setup_port_tbl(struct lcore_conf *qconf, uint32_t lcore, int socket, 1429 uint32_t port) 1430 { 1431 struct mbuf_table *mtb; 1432 uint32_t n; 1433 size_t sz; 1434 1435 n = RTE_MAX(max_flow_num, 2UL * MAX_PKT_BURST); 1436 sz = sizeof (*mtb) + sizeof (mtb->m_table[0]) * n; 1437 1438 if ((mtb = rte_zmalloc_socket(__func__, sz, CACHE_LINE_SIZE, 1439 socket)) == NULL) 1440 rte_exit(EXIT_FAILURE, "%s() for lcore: %u, port: %u " 1441 "failed to allocate %zu bytes\n", 1442 __func__, lcore, port, sz); 1443 1444 mtb->len = n; 1445 qconf->tx_mbufs[port] = mtb; 1446 } 1447 1448 static void 1449 setup_queue_tbl(struct lcore_conf *qconf, uint32_t lcore, int socket, 1450 uint32_t queue) 1451 { 1452 uint32_t nb_mbuf; 1453 uint64_t frag_cycles; 1454 char buf[RTE_MEMPOOL_NAMESIZE]; 1455 1456 frag_cycles = (rte_get_tsc_hz() + MS_PER_S - 1) / MS_PER_S * 1457 max_flow_ttl; 1458 1459 if ((qconf->frag_tbl[queue] = ipv4_frag_tbl_create(max_flow_num, 1460 IPV4_FRAG_TBL_BUCKET_ENTRIES, max_flow_num, frag_cycles, 1461 socket)) == NULL) 1462 rte_exit(EXIT_FAILURE, "ipv4_frag_tbl_create(%u) on " 1463 "lcore: %u for queue: %u failed\n", 1464 max_flow_num, lcore, queue); 1465 1466 /* 1467 * At any given moment up to <max_flow_num * (MAX_FRAG_NUM - 1)> 1468 * mbufs could be stored int the fragment table. 1469 * Plus, each TX queue can hold up to <max_flow_num> packets. 1470 */ 1471 1472 nb_mbuf = 2 * RTE_MAX(max_flow_num, 2UL * MAX_PKT_BURST) * MAX_FRAG_NUM; 1473 nb_mbuf *= (port_conf.rxmode.max_rx_pkt_len + BUF_SIZE - 1) / BUF_SIZE; 1474 nb_mbuf += RTE_TEST_RX_DESC_DEFAULT + RTE_TEST_TX_DESC_DEFAULT; 1475 1476 nb_mbuf = RTE_MAX(nb_mbuf, (uint32_t)DEF_MBUF_NUM); 1477 1478 rte_snprintf(buf, sizeof(buf), "mbuf_pool_%u_%u", lcore, queue); 1479 1480 if ((qconf->pool[queue] = rte_mempool_create(buf, nb_mbuf, MBUF_SIZE, 0, 1481 sizeof(struct rte_pktmbuf_pool_private), 1482 rte_pktmbuf_pool_init, NULL, rte_pktmbuf_init, NULL, 1483 socket, MEMPOOL_F_SP_PUT | MEMPOOL_F_SC_GET)) == NULL) 1484 rte_exit(EXIT_FAILURE, "mempool_create(%s) failed", buf); 1485 } 1486 1487 static void 1488 queue_dump_stat(void) 1489 { 1490 uint32_t i, lcore; 1491 const struct lcore_conf *qconf; 1492 1493 for (lcore = 0; lcore < RTE_MAX_LCORE; lcore++) { 1494 if (rte_lcore_is_enabled(lcore) == 0) 1495 continue; 1496 1497 qconf = lcore_conf + lcore; 1498 for (i = 0; i < qconf->n_rx_queue; i++) { 1499 1500 fprintf(stdout, " -- lcoreid=%u portid=%hhu " 1501 "rxqueueid=%hhu frag tbl stat:\n", 1502 lcore, qconf->rx_queue_list[i].port_id, 1503 qconf->rx_queue_list[i].queue_id); 1504 ipv4_frag_tbl_dump_stat(stdout, qconf->frag_tbl[i]); 1505 fprintf(stdout, "TX bursts:\t%" PRIu64 "\n" 1506 "TX packets _queued:\t%" PRIu64 "\n" 1507 "TX packets dropped:\t%" PRIu64 "\n" 1508 "TX packets send:\t%" PRIu64 "\n", 1509 qconf->tx_stat.call, 1510 qconf->tx_stat.queue, 1511 qconf->tx_stat.drop, 1512 qconf->tx_stat.send); 1513 } 1514 } 1515 } 1516 1517 static void 1518 signal_handler(int signum) 1519 { 1520 queue_dump_stat(); 1521 if (signum != SIGUSR1) 1522 rte_exit(0, "received signal: %d, exiting\n", signum); 1523 } 1524 1525 int 1526 MAIN(int argc, char **argv) 1527 { 1528 struct lcore_conf *qconf; 1529 int ret; 1530 unsigned nb_ports; 1531 uint16_t queueid; 1532 unsigned lcore_id; 1533 uint32_t n_tx_queue, nb_lcores; 1534 uint8_t portid, nb_rx_queue, queue, socketid; 1535 1536 /* init EAL */ 1537 ret = rte_eal_init(argc, argv); 1538 if (ret < 0) 1539 rte_exit(EXIT_FAILURE, "Invalid EAL parameters\n"); 1540 argc -= ret; 1541 argv += ret; 1542 1543 /* parse application arguments (after the EAL ones) */ 1544 ret = parse_args(argc, argv); 1545 if (ret < 0) 1546 rte_exit(EXIT_FAILURE, "Invalid L3FWD parameters\n"); 1547 1548 if (check_lcore_params() < 0) 1549 rte_exit(EXIT_FAILURE, "check_lcore_params failed\n"); 1550 1551 ret = init_lcore_rx_queues(); 1552 if (ret < 0) 1553 rte_exit(EXIT_FAILURE, "init_lcore_rx_queues failed\n"); 1554 1555 1556 /* init driver(s) */ 1557 if (rte_pmd_init_all() < 0) 1558 rte_exit(EXIT_FAILURE, "Cannot init pmd\n"); 1559 1560 if (rte_eal_pci_probe() < 0) 1561 rte_exit(EXIT_FAILURE, "Cannot probe PCI\n"); 1562 1563 nb_ports = rte_eth_dev_count(); 1564 if (nb_ports > MAX_PORTS) 1565 nb_ports = MAX_PORTS; 1566 1567 if (check_port_config(nb_ports) < 0) 1568 rte_exit(EXIT_FAILURE, "check_port_config failed\n"); 1569 1570 nb_lcores = rte_lcore_count(); 1571 1572 /* initialize all ports */ 1573 for (portid = 0; portid < nb_ports; portid++) { 1574 /* skip ports that are not enabled */ 1575 if ((enabled_port_mask & (1 << portid)) == 0) { 1576 printf("\nSkipping disabled port %d\n", portid); 1577 continue; 1578 } 1579 1580 /* init port */ 1581 printf("Initializing port %d ... ", portid ); 1582 fflush(stdout); 1583 1584 nb_rx_queue = get_port_n_rx_queues(portid); 1585 n_tx_queue = nb_lcores; 1586 if (n_tx_queue > MAX_TX_QUEUE_PER_PORT) 1587 n_tx_queue = MAX_TX_QUEUE_PER_PORT; 1588 printf("Creating queues: nb_rxq=%d nb_txq=%u... ", 1589 nb_rx_queue, (unsigned)n_tx_queue ); 1590 ret = rte_eth_dev_configure(portid, nb_rx_queue, 1591 (uint16_t)n_tx_queue, &port_conf); 1592 if (ret < 0) 1593 rte_exit(EXIT_FAILURE, "Cannot configure device: err=%d, port=%d\n", 1594 ret, portid); 1595 1596 rte_eth_macaddr_get(portid, &ports_eth_addr[portid]); 1597 print_ethaddr(" Address:", &ports_eth_addr[portid]); 1598 printf(", "); 1599 1600 /* init memory */ 1601 ret = init_mem(); 1602 if (ret < 0) 1603 rte_exit(EXIT_FAILURE, "init_mem failed\n"); 1604 1605 /* init one TX queue per couple (lcore,port) */ 1606 queueid = 0; 1607 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { 1608 if (rte_lcore_is_enabled(lcore_id) == 0) 1609 continue; 1610 1611 if (numa_on) 1612 socketid = (uint8_t)rte_lcore_to_socket_id(lcore_id); 1613 else 1614 socketid = 0; 1615 1616 printf("txq=%u,%d,%d ", lcore_id, queueid, socketid); 1617 fflush(stdout); 1618 ret = rte_eth_tx_queue_setup(portid, queueid, nb_txd, 1619 socketid, &tx_conf); 1620 if (ret < 0) 1621 rte_exit(EXIT_FAILURE, "rte_eth_tx_queue_setup: err=%d, " 1622 "port=%d\n", ret, portid); 1623 1624 qconf = &lcore_conf[lcore_id]; 1625 qconf->tx_queue_id[portid] = queueid; 1626 setup_port_tbl(qconf, lcore_id, socketid, portid); 1627 queueid++; 1628 } 1629 printf("\n"); 1630 } 1631 1632 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { 1633 if (rte_lcore_is_enabled(lcore_id) == 0) 1634 continue; 1635 qconf = &lcore_conf[lcore_id]; 1636 printf("\nInitializing rx queues on lcore %u ... ", lcore_id ); 1637 fflush(stdout); 1638 /* init RX queues */ 1639 for(queue = 0; queue < qconf->n_rx_queue; ++queue) { 1640 portid = qconf->rx_queue_list[queue].port_id; 1641 queueid = qconf->rx_queue_list[queue].queue_id; 1642 1643 if (numa_on) 1644 socketid = (uint8_t)rte_lcore_to_socket_id(lcore_id); 1645 else 1646 socketid = 0; 1647 1648 printf("rxq=%d,%d,%d ", portid, queueid, socketid); 1649 fflush(stdout); 1650 1651 setup_queue_tbl(qconf, lcore_id, socketid, queue); 1652 1653 ret = rte_eth_rx_queue_setup(portid, queueid, nb_rxd, 1654 socketid, &rx_conf, qconf->pool[queue]); 1655 if (ret < 0) 1656 rte_exit(EXIT_FAILURE, 1657 "rte_eth_rx_queue_setup: err=%d," 1658 "port=%d\n", ret, portid); 1659 } 1660 } 1661 1662 printf("\n"); 1663 1664 /* start ports */ 1665 for (portid = 0; portid < nb_ports; portid++) { 1666 if ((enabled_port_mask & (1 << portid)) == 0) { 1667 continue; 1668 } 1669 /* Start device */ 1670 ret = rte_eth_dev_start(portid); 1671 if (ret < 0) 1672 rte_exit(EXIT_FAILURE, "rte_eth_dev_start: err=%d, port=%d\n", 1673 ret, portid); 1674 1675 /* 1676 * If enabled, put device in promiscuous mode. 1677 * This allows IO forwarding mode to forward packets 1678 * to itself through 2 cross-connected ports of the 1679 * target machine. 1680 */ 1681 if (promiscuous_on) 1682 rte_eth_promiscuous_enable(portid); 1683 } 1684 1685 check_all_ports_link_status((uint8_t)nb_ports, enabled_port_mask); 1686 1687 signal(SIGUSR1, signal_handler); 1688 signal(SIGTERM, signal_handler); 1689 signal(SIGINT, signal_handler); 1690 1691 /* launch per-lcore init on every lcore */ 1692 rte_eal_mp_remote_launch(main_loop, NULL, CALL_MASTER); 1693 RTE_LCORE_FOREACH_SLAVE(lcore_id) { 1694 if (rte_eal_wait_lcore(lcore_id) < 0) 1695 return -1; 1696 } 1697 1698 return 0; 1699 } 1700