1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2014 Intel Corporation 3 */ 4 5 #include <stdio.h> 6 #include <stdlib.h> 7 #include <stdint.h> 8 #include <inttypes.h> 9 #include <sys/types.h> 10 #include <string.h> 11 #include <sys/queue.h> 12 #include <stdarg.h> 13 #include <errno.h> 14 #include <getopt.h> 15 #include <signal.h> 16 #include <sys/param.h> 17 18 #include <rte_common.h> 19 #include <rte_byteorder.h> 20 #include <rte_log.h> 21 #include <rte_memory.h> 22 #include <rte_memcpy.h> 23 #include <rte_eal.h> 24 #include <rte_launch.h> 25 #include <rte_atomic.h> 26 #include <rte_cycles.h> 27 #include <rte_prefetch.h> 28 #include <rte_lcore.h> 29 #include <rte_per_lcore.h> 30 #include <rte_branch_prediction.h> 31 #include <rte_interrupts.h> 32 #include <rte_random.h> 33 #include <rte_debug.h> 34 #include <rte_ether.h> 35 #include <rte_ethdev.h> 36 #include <rte_mempool.h> 37 #include <rte_mbuf.h> 38 #include <rte_malloc.h> 39 #include <rte_ip.h> 40 #include <rte_tcp.h> 41 #include <rte_udp.h> 42 #include <rte_string_fns.h> 43 #include <rte_lpm.h> 44 #include <rte_lpm6.h> 45 46 #include <rte_ip_frag.h> 47 48 #define MAX_PKT_BURST 32 49 50 51 #define RTE_LOGTYPE_IP_RSMBL RTE_LOGTYPE_USER1 52 53 #define MAX_JUMBO_PKT_LEN 9600 54 55 #define BUF_SIZE RTE_MBUF_DEFAULT_DATAROOM 56 #define MBUF_DATA_SIZE RTE_MBUF_DEFAULT_BUF_SIZE 57 58 #define NB_MBUF 8192 59 #define MEMPOOL_CACHE_SIZE 256 60 61 /* allow max jumbo frame 9.5 KB */ 62 #define JUMBO_FRAME_MAX_SIZE 0x2600 63 64 #define MAX_FLOW_NUM UINT16_MAX 65 #define MIN_FLOW_NUM 1 66 #define DEF_FLOW_NUM 0x1000 67 68 /* TTL numbers are in ms. */ 69 #define MAX_FLOW_TTL (3600 * MS_PER_S) 70 #define MIN_FLOW_TTL 1 71 #define DEF_FLOW_TTL MS_PER_S 72 73 #define MAX_FRAG_NUM RTE_LIBRTE_IP_FRAG_MAX_FRAG 74 75 /* Should be power of two. */ 76 #define IP_FRAG_TBL_BUCKET_ENTRIES 16 77 78 static uint32_t max_flow_num = DEF_FLOW_NUM; 79 static uint32_t max_flow_ttl = DEF_FLOW_TTL; 80 81 #define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */ 82 83 #define NB_SOCKETS 8 84 85 /* Configure how many packets ahead to prefetch, when reading packets */ 86 #define PREFETCH_OFFSET 3 87 88 /* 89 * Configurable number of RX/TX ring descriptors 90 */ 91 #define RTE_TEST_RX_DESC_DEFAULT 1024 92 #define RTE_TEST_TX_DESC_DEFAULT 1024 93 94 static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT; 95 static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT; 96 97 /* ethernet addresses of ports */ 98 static struct rte_ether_addr ports_eth_addr[RTE_MAX_ETHPORTS]; 99 100 #ifndef IPv4_BYTES 101 #define IPv4_BYTES_FMT "%" PRIu8 ".%" PRIu8 ".%" PRIu8 ".%" PRIu8 102 #define IPv4_BYTES(addr) \ 103 (uint8_t) (((addr) >> 24) & 0xFF),\ 104 (uint8_t) (((addr) >> 16) & 0xFF),\ 105 (uint8_t) (((addr) >> 8) & 0xFF),\ 106 (uint8_t) ((addr) & 0xFF) 107 #endif 108 109 #ifndef IPv6_BYTES 110 #define IPv6_BYTES_FMT "%02x%02x:%02x%02x:%02x%02x:%02x%02x:"\ 111 "%02x%02x:%02x%02x:%02x%02x:%02x%02x" 112 #define IPv6_BYTES(addr) \ 113 addr[0], addr[1], addr[2], addr[3], \ 114 addr[4], addr[5], addr[6], addr[7], \ 115 addr[8], addr[9], addr[10], addr[11],\ 116 addr[12], addr[13],addr[14], addr[15] 117 #endif 118 119 #define IPV6_ADDR_LEN 16 120 121 /* mask of enabled ports */ 122 static uint32_t enabled_port_mask = 0; 123 124 static int rx_queue_per_lcore = 1; 125 126 struct mbuf_table { 127 uint32_t len; 128 uint32_t head; 129 uint32_t tail; 130 struct rte_mbuf *m_table[0]; 131 }; 132 133 struct rx_queue { 134 struct rte_ip_frag_tbl *frag_tbl; 135 struct rte_mempool *pool; 136 struct rte_lpm *lpm; 137 struct rte_lpm6 *lpm6; 138 uint16_t portid; 139 }; 140 141 struct tx_lcore_stat { 142 uint64_t call; 143 uint64_t drop; 144 uint64_t queue; 145 uint64_t send; 146 }; 147 148 #define MAX_RX_QUEUE_PER_LCORE 16 149 #define MAX_TX_QUEUE_PER_PORT 16 150 #define MAX_RX_QUEUE_PER_PORT 128 151 152 struct lcore_queue_conf { 153 uint16_t n_rx_queue; 154 struct rx_queue rx_queue_list[MAX_RX_QUEUE_PER_LCORE]; 155 uint16_t tx_queue_id[RTE_MAX_ETHPORTS]; 156 struct rte_ip_frag_death_row death_row; 157 struct mbuf_table *tx_mbufs[RTE_MAX_ETHPORTS]; 158 struct tx_lcore_stat tx_stat; 159 } __rte_cache_aligned; 160 static struct lcore_queue_conf lcore_queue_conf[RTE_MAX_LCORE]; 161 162 static struct rte_eth_conf port_conf = { 163 .rxmode = { 164 .mq_mode = ETH_MQ_RX_RSS, 165 .max_rx_pkt_len = JUMBO_FRAME_MAX_SIZE, 166 .split_hdr_size = 0, 167 .offloads = (DEV_RX_OFFLOAD_CHECKSUM | 168 DEV_RX_OFFLOAD_JUMBO_FRAME), 169 }, 170 .rx_adv_conf = { 171 .rss_conf = { 172 .rss_key = NULL, 173 .rss_hf = ETH_RSS_IP, 174 }, 175 }, 176 .txmode = { 177 .mq_mode = ETH_MQ_TX_NONE, 178 .offloads = (DEV_TX_OFFLOAD_IPV4_CKSUM | 179 DEV_TX_OFFLOAD_MULTI_SEGS), 180 }, 181 }; 182 183 /* 184 * IPv4 forwarding table 185 */ 186 struct l3fwd_ipv4_route { 187 uint32_t ip; 188 uint8_t depth; 189 uint8_t if_out; 190 }; 191 192 /* Default l3fwd_ipv4_route_array table. 8< */ 193 struct l3fwd_ipv4_route l3fwd_ipv4_route_array[] = { 194 {RTE_IPV4(100,10,0,0), 16, 0}, 195 {RTE_IPV4(100,20,0,0), 16, 1}, 196 {RTE_IPV4(100,30,0,0), 16, 2}, 197 {RTE_IPV4(100,40,0,0), 16, 3}, 198 {RTE_IPV4(100,50,0,0), 16, 4}, 199 {RTE_IPV4(100,60,0,0), 16, 5}, 200 {RTE_IPV4(100,70,0,0), 16, 6}, 201 {RTE_IPV4(100,80,0,0), 16, 7}, 202 }; 203 /* >8 End of default l3fwd_ipv4_route_array table. */ 204 205 /* 206 * IPv6 forwarding table 207 */ 208 209 struct l3fwd_ipv6_route { 210 uint8_t ip[IPV6_ADDR_LEN]; 211 uint8_t depth; 212 uint8_t if_out; 213 }; 214 215 /* Default l3fwd_ipv6_route_array table. 8< */ 216 static struct l3fwd_ipv6_route l3fwd_ipv6_route_array[] = { 217 {{1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 0}, 218 {{2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 1}, 219 {{3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 2}, 220 {{4,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 3}, 221 {{5,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 4}, 222 {{6,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 5}, 223 {{7,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 6}, 224 {{8,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 7}, 225 }; 226 /* >8 End of default l3fwd_ipv6_route_array table. */ 227 228 #define LPM_MAX_RULES 1024 229 #define LPM6_MAX_RULES 1024 230 #define LPM6_NUMBER_TBL8S (1 << 16) 231 232 struct rte_lpm6_config lpm6_config = { 233 .max_rules = LPM6_MAX_RULES, 234 .number_tbl8s = LPM6_NUMBER_TBL8S, 235 .flags = 0 236 }; 237 238 static struct rte_lpm *socket_lpm[RTE_MAX_NUMA_NODES]; 239 static struct rte_lpm6 *socket_lpm6[RTE_MAX_NUMA_NODES]; 240 241 #ifdef RTE_LIBRTE_IP_FRAG_TBL_STAT 242 #define TX_LCORE_STAT_UPDATE(s, f, v) ((s)->f += (v)) 243 #else 244 #define TX_LCORE_STAT_UPDATE(s, f, v) do {} while (0) 245 #endif /* RTE_LIBRTE_IP_FRAG_TBL_STAT */ 246 247 /* 248 * If number of queued packets reached given threahold, then 249 * send burst of packets on an output interface. 250 */ 251 static inline uint32_t 252 send_burst(struct lcore_queue_conf *qconf, uint32_t thresh, uint16_t port) 253 { 254 uint32_t fill, len, k, n; 255 struct mbuf_table *txmb; 256 257 txmb = qconf->tx_mbufs[port]; 258 len = txmb->len; 259 260 if ((int32_t)(fill = txmb->head - txmb->tail) < 0) 261 fill += len; 262 263 if (fill >= thresh) { 264 n = RTE_MIN(len - txmb->tail, fill); 265 266 k = rte_eth_tx_burst(port, qconf->tx_queue_id[port], 267 txmb->m_table + txmb->tail, (uint16_t)n); 268 269 TX_LCORE_STAT_UPDATE(&qconf->tx_stat, call, 1); 270 TX_LCORE_STAT_UPDATE(&qconf->tx_stat, send, k); 271 272 fill -= k; 273 if ((txmb->tail += k) == len) 274 txmb->tail = 0; 275 } 276 277 return fill; 278 } 279 280 /* Enqueue a single packet, and send burst if queue is filled */ 281 static inline int 282 send_single_packet(struct rte_mbuf *m, uint16_t port) 283 { 284 uint32_t fill, lcore_id, len; 285 struct lcore_queue_conf *qconf; 286 struct mbuf_table *txmb; 287 288 lcore_id = rte_lcore_id(); 289 qconf = &lcore_queue_conf[lcore_id]; 290 291 txmb = qconf->tx_mbufs[port]; 292 len = txmb->len; 293 294 fill = send_burst(qconf, MAX_PKT_BURST, port); 295 296 if (fill == len - 1) { 297 TX_LCORE_STAT_UPDATE(&qconf->tx_stat, drop, 1); 298 rte_pktmbuf_free(txmb->m_table[txmb->tail]); 299 if (++txmb->tail == len) 300 txmb->tail = 0; 301 } 302 303 TX_LCORE_STAT_UPDATE(&qconf->tx_stat, queue, 1); 304 txmb->m_table[txmb->head] = m; 305 if(++txmb->head == len) 306 txmb->head = 0; 307 308 return 0; 309 } 310 311 static inline void 312 reassemble(struct rte_mbuf *m, uint16_t portid, uint32_t queue, 313 struct lcore_queue_conf *qconf, uint64_t tms) 314 { 315 struct rte_ether_hdr *eth_hdr; 316 struct rte_ip_frag_tbl *tbl; 317 struct rte_ip_frag_death_row *dr; 318 struct rx_queue *rxq; 319 void *d_addr_bytes; 320 uint32_t next_hop; 321 uint16_t dst_port; 322 323 rxq = &qconf->rx_queue_list[queue]; 324 325 eth_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); 326 327 dst_port = portid; 328 329 /* if packet is IPv4 */ 330 if (RTE_ETH_IS_IPV4_HDR(m->packet_type)) { 331 struct rte_ipv4_hdr *ip_hdr; 332 uint32_t ip_dst; 333 334 ip_hdr = (struct rte_ipv4_hdr *)(eth_hdr + 1); 335 336 /* if it is a fragmented packet, then try to reassemble. */ 337 if (rte_ipv4_frag_pkt_is_fragmented(ip_hdr)) { 338 struct rte_mbuf *mo; 339 340 tbl = rxq->frag_tbl; 341 dr = &qconf->death_row; 342 343 /* prepare mbuf: setup l2_len/l3_len. */ 344 m->l2_len = sizeof(*eth_hdr); 345 m->l3_len = sizeof(*ip_hdr); 346 347 /* process this fragment. */ 348 mo = rte_ipv4_frag_reassemble_packet(tbl, dr, m, tms, ip_hdr); 349 if (mo == NULL) 350 /* no packet to send out. */ 351 return; 352 353 /* we have our packet reassembled. */ 354 if (mo != m) { 355 m = mo; 356 eth_hdr = rte_pktmbuf_mtod(m, 357 struct rte_ether_hdr *); 358 ip_hdr = (struct rte_ipv4_hdr *)(eth_hdr + 1); 359 } 360 361 /* update offloading flags */ 362 m->ol_flags |= (PKT_TX_IPV4 | PKT_TX_IP_CKSUM); 363 } 364 ip_dst = rte_be_to_cpu_32(ip_hdr->dst_addr); 365 366 /* Find destination port */ 367 if (rte_lpm_lookup(rxq->lpm, ip_dst, &next_hop) == 0 && 368 (enabled_port_mask & 1 << next_hop) != 0) { 369 dst_port = next_hop; 370 } 371 372 eth_hdr->ether_type = rte_be_to_cpu_16(RTE_ETHER_TYPE_IPV4); 373 } else if (RTE_ETH_IS_IPV6_HDR(m->packet_type)) { 374 /* if packet is IPv6 */ 375 struct ipv6_extension_fragment *frag_hdr; 376 struct rte_ipv6_hdr *ip_hdr; 377 378 ip_hdr = (struct rte_ipv6_hdr *)(eth_hdr + 1); 379 380 frag_hdr = rte_ipv6_frag_get_ipv6_fragment_header(ip_hdr); 381 382 if (frag_hdr != NULL) { 383 struct rte_mbuf *mo; 384 385 tbl = rxq->frag_tbl; 386 dr = &qconf->death_row; 387 388 /* prepare mbuf: setup l2_len/l3_len. */ 389 m->l2_len = sizeof(*eth_hdr); 390 m->l3_len = sizeof(*ip_hdr) + sizeof(*frag_hdr); 391 392 mo = rte_ipv6_frag_reassemble_packet(tbl, dr, m, tms, ip_hdr, frag_hdr); 393 if (mo == NULL) 394 return; 395 396 if (mo != m) { 397 m = mo; 398 eth_hdr = rte_pktmbuf_mtod(m, 399 struct rte_ether_hdr *); 400 ip_hdr = (struct rte_ipv6_hdr *)(eth_hdr + 1); 401 } 402 } 403 404 /* Find destination port */ 405 if (rte_lpm6_lookup(rxq->lpm6, ip_hdr->dst_addr, 406 &next_hop) == 0 && 407 (enabled_port_mask & 1 << next_hop) != 0) { 408 dst_port = next_hop; 409 } 410 411 eth_hdr->ether_type = rte_be_to_cpu_16(RTE_ETHER_TYPE_IPV6); 412 } 413 /* if packet wasn't IPv4 or IPv6, it's forwarded to the port it came from */ 414 415 /* 02:00:00:00:00:xx */ 416 d_addr_bytes = ð_hdr->dst_addr.addr_bytes[0]; 417 *((uint64_t *)d_addr_bytes) = 0x000000000002 + ((uint64_t)dst_port << 40); 418 419 /* src addr */ 420 rte_ether_addr_copy(&ports_eth_addr[dst_port], ð_hdr->src_addr); 421 422 send_single_packet(m, dst_port); 423 } 424 425 /* main processing loop */ 426 static int 427 main_loop(__rte_unused void *dummy) 428 { 429 struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; 430 unsigned lcore_id; 431 uint64_t diff_tsc, cur_tsc, prev_tsc; 432 int i, j, nb_rx; 433 uint16_t portid; 434 struct lcore_queue_conf *qconf; 435 const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * BURST_TX_DRAIN_US; 436 437 prev_tsc = 0; 438 439 lcore_id = rte_lcore_id(); 440 qconf = &lcore_queue_conf[lcore_id]; 441 442 if (qconf->n_rx_queue == 0) { 443 RTE_LOG(INFO, IP_RSMBL, "lcore %u has nothing to do\n", lcore_id); 444 return 0; 445 } 446 447 RTE_LOG(INFO, IP_RSMBL, "entering main loop on lcore %u\n", lcore_id); 448 449 for (i = 0; i < qconf->n_rx_queue; i++) { 450 451 portid = qconf->rx_queue_list[i].portid; 452 RTE_LOG(INFO, IP_RSMBL, " -- lcoreid=%u portid=%u\n", lcore_id, 453 portid); 454 } 455 456 while (1) { 457 458 cur_tsc = rte_rdtsc(); 459 460 /* 461 * TX burst queue drain 462 */ 463 diff_tsc = cur_tsc - prev_tsc; 464 if (unlikely(diff_tsc > drain_tsc)) { 465 466 /* 467 * This could be optimized (use queueid instead of 468 * portid), but it is not called so often 469 */ 470 for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++) { 471 if ((enabled_port_mask & (1 << portid)) != 0) 472 send_burst(qconf, 1, portid); 473 } 474 475 prev_tsc = cur_tsc; 476 } 477 478 /* 479 * Read packet from RX queues 480 */ 481 for (i = 0; i < qconf->n_rx_queue; ++i) { 482 483 portid = qconf->rx_queue_list[i].portid; 484 485 nb_rx = rte_eth_rx_burst(portid, 0, pkts_burst, 486 MAX_PKT_BURST); 487 488 /* Prefetch first packets */ 489 for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++) { 490 rte_prefetch0(rte_pktmbuf_mtod( 491 pkts_burst[j], void *)); 492 } 493 494 /* Prefetch and forward already prefetched packets */ 495 for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) { 496 rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[ 497 j + PREFETCH_OFFSET], void *)); 498 reassemble(pkts_burst[j], portid, 499 i, qconf, cur_tsc); 500 } 501 502 /* Forward remaining prefetched packets */ 503 for (; j < nb_rx; j++) { 504 reassemble(pkts_burst[j], portid, 505 i, qconf, cur_tsc); 506 } 507 508 rte_ip_frag_free_death_row(&qconf->death_row, 509 PREFETCH_OFFSET); 510 } 511 } 512 } 513 514 /* display usage */ 515 static void 516 print_usage(const char *prgname) 517 { 518 printf("%s [EAL options] -- -p PORTMASK [-q NQ]" 519 " [--max-pkt-len PKTLEN]" 520 " [--maxflows=<flows>] [--flowttl=<ttl>[(s|ms)]]\n" 521 " -p PORTMASK: hexadecimal bitmask of ports to configure\n" 522 " -q NQ: number of RX queues per lcore\n" 523 " --maxflows=<flows>: optional, maximum number of flows " 524 "supported\n" 525 " --flowttl=<ttl>[(s|ms)]: optional, maximum TTL for each " 526 "flow\n", 527 prgname); 528 } 529 530 static uint32_t 531 parse_flow_num(const char *str, uint32_t min, uint32_t max, uint32_t *val) 532 { 533 char *end; 534 uint64_t v; 535 536 /* parse decimal string */ 537 errno = 0; 538 v = strtoul(str, &end, 10); 539 if (errno != 0 || *end != '\0') 540 return -EINVAL; 541 542 if (v < min || v > max) 543 return -EINVAL; 544 545 *val = (uint32_t)v; 546 return 0; 547 } 548 549 static int 550 parse_flow_ttl(const char *str, uint32_t min, uint32_t max, uint32_t *val) 551 { 552 char *end; 553 uint64_t v; 554 555 static const char frmt_sec[] = "s"; 556 static const char frmt_msec[] = "ms"; 557 558 /* parse decimal string */ 559 errno = 0; 560 v = strtoul(str, &end, 10); 561 if (errno != 0) 562 return -EINVAL; 563 564 if (*end != '\0') { 565 if (strncmp(frmt_sec, end, sizeof(frmt_sec)) == 0) 566 v *= MS_PER_S; 567 else if (strncmp(frmt_msec, end, sizeof (frmt_msec)) != 0) 568 return -EINVAL; 569 } 570 571 if (v < min || v > max) 572 return -EINVAL; 573 574 *val = (uint32_t)v; 575 return 0; 576 } 577 578 static int 579 parse_portmask(const char *portmask) 580 { 581 char *end = NULL; 582 unsigned long pm; 583 584 /* parse hexadecimal string */ 585 pm = strtoul(portmask, &end, 16); 586 if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0')) 587 return 0; 588 589 return pm; 590 } 591 592 static int 593 parse_nqueue(const char *q_arg) 594 { 595 char *end = NULL; 596 unsigned long n; 597 598 printf("%p\n", q_arg); 599 600 /* parse hexadecimal string */ 601 n = strtoul(q_arg, &end, 10); 602 if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0')) 603 return -1; 604 if (n == 0) 605 return -1; 606 if (n >= MAX_RX_QUEUE_PER_LCORE) 607 return -1; 608 609 return n; 610 } 611 612 /* Parse the argument given in the command line of the application */ 613 static int 614 parse_args(int argc, char **argv) 615 { 616 int opt, ret; 617 char **argvopt; 618 int option_index; 619 char *prgname = argv[0]; 620 static struct option lgopts[] = { 621 {"max-pkt-len", 1, 0, 0}, 622 {"maxflows", 1, 0, 0}, 623 {"flowttl", 1, 0, 0}, 624 {NULL, 0, 0, 0} 625 }; 626 627 argvopt = argv; 628 629 while ((opt = getopt_long(argc, argvopt, "p:q:", 630 lgopts, &option_index)) != EOF) { 631 632 switch (opt) { 633 /* portmask */ 634 case 'p': 635 enabled_port_mask = parse_portmask(optarg); 636 if (enabled_port_mask == 0) { 637 printf("invalid portmask\n"); 638 print_usage(prgname); 639 return -1; 640 } 641 break; 642 643 /* nqueue */ 644 case 'q': 645 rx_queue_per_lcore = parse_nqueue(optarg); 646 if (rx_queue_per_lcore < 0) { 647 printf("invalid queue number\n"); 648 print_usage(prgname); 649 return -1; 650 } 651 break; 652 653 /* long options */ 654 case 0: 655 if (!strncmp(lgopts[option_index].name, 656 "maxflows", 8)) { 657 if ((ret = parse_flow_num(optarg, MIN_FLOW_NUM, 658 MAX_FLOW_NUM, 659 &max_flow_num)) != 0) { 660 printf("invalid value: \"%s\" for " 661 "parameter %s\n", 662 optarg, 663 lgopts[option_index].name); 664 print_usage(prgname); 665 return ret; 666 } 667 } 668 669 if (!strncmp(lgopts[option_index].name, "flowttl", 7)) { 670 if ((ret = parse_flow_ttl(optarg, MIN_FLOW_TTL, 671 MAX_FLOW_TTL, 672 &max_flow_ttl)) != 0) { 673 printf("invalid value: \"%s\" for " 674 "parameter %s\n", 675 optarg, 676 lgopts[option_index].name); 677 print_usage(prgname); 678 return ret; 679 } 680 } 681 682 break; 683 684 default: 685 print_usage(prgname); 686 return -1; 687 } 688 } 689 690 if (optind >= 0) 691 argv[optind-1] = prgname; 692 693 ret = optind-1; 694 optind = 1; /* reset getopt lib */ 695 return ret; 696 } 697 698 static void 699 print_ethaddr(const char *name, const struct rte_ether_addr *eth_addr) 700 { 701 char buf[RTE_ETHER_ADDR_FMT_SIZE]; 702 rte_ether_format_addr(buf, RTE_ETHER_ADDR_FMT_SIZE, eth_addr); 703 printf("%s%s", name, buf); 704 } 705 706 /* Check the link status of all ports in up to 9s, and print them finally */ 707 static void 708 check_all_ports_link_status(uint32_t port_mask) 709 { 710 #define CHECK_INTERVAL 100 /* 100ms */ 711 #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */ 712 uint16_t portid; 713 uint8_t count, all_ports_up, print_flag = 0; 714 struct rte_eth_link link; 715 int ret; 716 char link_status_text[RTE_ETH_LINK_MAX_STR_LEN]; 717 718 printf("\nChecking link status"); 719 fflush(stdout); 720 for (count = 0; count <= MAX_CHECK_TIME; count++) { 721 all_ports_up = 1; 722 RTE_ETH_FOREACH_DEV(portid) { 723 if ((port_mask & (1 << portid)) == 0) 724 continue; 725 memset(&link, 0, sizeof(link)); 726 ret = rte_eth_link_get_nowait(portid, &link); 727 if (ret < 0) { 728 all_ports_up = 0; 729 if (print_flag == 1) 730 printf("Port %u link get failed: %s\n", 731 portid, rte_strerror(-ret)); 732 continue; 733 } 734 /* print link status if flag set */ 735 if (print_flag == 1) { 736 rte_eth_link_to_str(link_status_text, 737 sizeof(link_status_text), &link); 738 printf("Port %d %s\n", portid, 739 link_status_text); 740 continue; 741 } 742 /* clear all_ports_up flag if any link down */ 743 if (link.link_status == ETH_LINK_DOWN) { 744 all_ports_up = 0; 745 break; 746 } 747 } 748 /* after finally printing all link status, get out */ 749 if (print_flag == 1) 750 break; 751 752 if (all_ports_up == 0) { 753 printf("."); 754 fflush(stdout); 755 rte_delay_ms(CHECK_INTERVAL); 756 } 757 758 /* set the print_flag if all ports up or timeout */ 759 if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) { 760 print_flag = 1; 761 printf("\ndone\n"); 762 } 763 } 764 } 765 766 static int 767 init_routing_table(void) 768 { 769 struct rte_lpm *lpm; 770 struct rte_lpm6 *lpm6; 771 int socket, ret; 772 unsigned i; 773 774 for (socket = 0; socket < RTE_MAX_NUMA_NODES; socket++) { 775 if (socket_lpm[socket]) { 776 lpm = socket_lpm[socket]; 777 /* populate the LPM table */ 778 for (i = 0; i < RTE_DIM(l3fwd_ipv4_route_array); i++) { 779 ret = rte_lpm_add(lpm, 780 l3fwd_ipv4_route_array[i].ip, 781 l3fwd_ipv4_route_array[i].depth, 782 l3fwd_ipv4_route_array[i].if_out); 783 784 if (ret < 0) { 785 RTE_LOG(ERR, IP_RSMBL, "Unable to add entry %i to the l3fwd " 786 "LPM table\n", i); 787 return -1; 788 } 789 790 RTE_LOG(INFO, IP_RSMBL, "Socket %i: adding route " IPv4_BYTES_FMT 791 "/%d (port %d)\n", 792 socket, 793 IPv4_BYTES(l3fwd_ipv4_route_array[i].ip), 794 l3fwd_ipv4_route_array[i].depth, 795 l3fwd_ipv4_route_array[i].if_out); 796 } 797 } 798 799 if (socket_lpm6[socket]) { 800 lpm6 = socket_lpm6[socket]; 801 /* populate the LPM6 table */ 802 for (i = 0; i < RTE_DIM(l3fwd_ipv6_route_array); i++) { 803 ret = rte_lpm6_add(lpm6, 804 l3fwd_ipv6_route_array[i].ip, 805 l3fwd_ipv6_route_array[i].depth, 806 l3fwd_ipv6_route_array[i].if_out); 807 808 if (ret < 0) { 809 RTE_LOG(ERR, IP_RSMBL, "Unable to add entry %i to the l3fwd " 810 "LPM6 table\n", i); 811 return -1; 812 } 813 814 RTE_LOG(INFO, IP_RSMBL, "Socket %i: adding route " IPv6_BYTES_FMT 815 "/%d (port %d)\n", 816 socket, 817 IPv6_BYTES(l3fwd_ipv6_route_array[i].ip), 818 l3fwd_ipv6_route_array[i].depth, 819 l3fwd_ipv6_route_array[i].if_out); 820 } 821 } 822 } 823 return 0; 824 } 825 826 static int 827 setup_port_tbl(struct lcore_queue_conf *qconf, uint32_t lcore, int socket, 828 uint32_t port) 829 { 830 struct mbuf_table *mtb; 831 uint32_t n; 832 size_t sz; 833 834 n = RTE_MAX(max_flow_num, 2UL * MAX_PKT_BURST); 835 sz = sizeof (*mtb) + sizeof (mtb->m_table[0]) * n; 836 837 if ((mtb = rte_zmalloc_socket(__func__, sz, RTE_CACHE_LINE_SIZE, 838 socket)) == NULL) { 839 RTE_LOG(ERR, IP_RSMBL, "%s() for lcore: %u, port: %u " 840 "failed to allocate %zu bytes\n", 841 __func__, lcore, port, sz); 842 return -1; 843 } 844 845 mtb->len = n; 846 qconf->tx_mbufs[port] = mtb; 847 848 return 0; 849 } 850 851 static int 852 setup_queue_tbl(struct rx_queue *rxq, uint32_t lcore, uint32_t queue) 853 { 854 int socket; 855 uint32_t nb_mbuf; 856 uint64_t frag_cycles; 857 char buf[RTE_MEMPOOL_NAMESIZE]; 858 859 socket = rte_lcore_to_socket_id(lcore); 860 if (socket == SOCKET_ID_ANY) 861 socket = 0; 862 863 /* Each table entry holds information about packet fragmentation. 8< */ 864 frag_cycles = (rte_get_tsc_hz() + MS_PER_S - 1) / MS_PER_S * 865 max_flow_ttl; 866 867 if ((rxq->frag_tbl = rte_ip_frag_table_create(max_flow_num, 868 IP_FRAG_TBL_BUCKET_ENTRIES, max_flow_num, frag_cycles, 869 socket)) == NULL) { 870 RTE_LOG(ERR, IP_RSMBL, "ip_frag_tbl_create(%u) on " 871 "lcore: %u for queue: %u failed\n", 872 max_flow_num, lcore, queue); 873 return -1; 874 } 875 /* >8 End of holding packet fragmentation. */ 876 877 /* 878 * At any given moment up to <max_flow_num * (MAX_FRAG_NUM)> 879 * mbufs could be stored int the fragment table. 880 * Plus, each TX queue can hold up to <max_flow_num> packets. 881 */ 882 883 /* mbufs stored int the gragment table. 8< */ 884 nb_mbuf = RTE_MAX(max_flow_num, 2UL * MAX_PKT_BURST) * MAX_FRAG_NUM; 885 nb_mbuf *= (port_conf.rxmode.max_rx_pkt_len + BUF_SIZE - 1) / BUF_SIZE; 886 nb_mbuf *= 2; /* ipv4 and ipv6 */ 887 nb_mbuf += nb_rxd + nb_txd; 888 889 nb_mbuf = RTE_MAX(nb_mbuf, (uint32_t)NB_MBUF); 890 891 snprintf(buf, sizeof(buf), "mbuf_pool_%u_%u", lcore, queue); 892 893 rxq->pool = rte_pktmbuf_pool_create(buf, nb_mbuf, MEMPOOL_CACHE_SIZE, 0, 894 MBUF_DATA_SIZE, socket); 895 if (rxq->pool == NULL) { 896 RTE_LOG(ERR, IP_RSMBL, 897 "rte_pktmbuf_pool_create(%s) failed", buf); 898 return -1; 899 } 900 /* >8 End of mbufs stored int the fragmentation table. */ 901 902 return 0; 903 } 904 905 static int 906 init_mem(void) 907 { 908 char buf[PATH_MAX]; 909 struct rte_lpm *lpm; 910 struct rte_lpm6 *lpm6; 911 struct rte_lpm_config lpm_config; 912 int socket; 913 unsigned lcore_id; 914 915 /* traverse through lcores and initialize structures on each socket */ 916 917 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { 918 919 if (rte_lcore_is_enabled(lcore_id) == 0) 920 continue; 921 922 socket = rte_lcore_to_socket_id(lcore_id); 923 924 if (socket == SOCKET_ID_ANY) 925 socket = 0; 926 927 if (socket_lpm[socket] == NULL) { 928 RTE_LOG(INFO, IP_RSMBL, "Creating LPM table on socket %i\n", socket); 929 snprintf(buf, sizeof(buf), "IP_RSMBL_LPM_%i", socket); 930 931 lpm_config.max_rules = LPM_MAX_RULES; 932 lpm_config.number_tbl8s = 256; 933 lpm_config.flags = 0; 934 935 lpm = rte_lpm_create(buf, socket, &lpm_config); 936 if (lpm == NULL) { 937 RTE_LOG(ERR, IP_RSMBL, "Cannot create LPM table\n"); 938 return -1; 939 } 940 socket_lpm[socket] = lpm; 941 } 942 943 if (socket_lpm6[socket] == NULL) { 944 RTE_LOG(INFO, IP_RSMBL, "Creating LPM6 table on socket %i\n", socket); 945 snprintf(buf, sizeof(buf), "IP_RSMBL_LPM_%i", socket); 946 947 lpm6 = rte_lpm6_create(buf, socket, &lpm6_config); 948 if (lpm6 == NULL) { 949 RTE_LOG(ERR, IP_RSMBL, "Cannot create LPM table\n"); 950 return -1; 951 } 952 socket_lpm6[socket] = lpm6; 953 } 954 } 955 956 return 0; 957 } 958 959 static void 960 queue_dump_stat(void) 961 { 962 uint32_t i, lcore; 963 const struct lcore_queue_conf *qconf; 964 965 for (lcore = 0; lcore < RTE_MAX_LCORE; lcore++) { 966 if (rte_lcore_is_enabled(lcore) == 0) 967 continue; 968 969 qconf = &lcore_queue_conf[lcore]; 970 for (i = 0; i < qconf->n_rx_queue; i++) { 971 972 fprintf(stdout, " -- lcoreid=%u portid=%u " 973 "frag tbl stat:\n", 974 lcore, qconf->rx_queue_list[i].portid); 975 rte_ip_frag_table_statistics_dump(stdout, 976 qconf->rx_queue_list[i].frag_tbl); 977 fprintf(stdout, "TX bursts:\t%" PRIu64 "\n" 978 "TX packets _queued:\t%" PRIu64 "\n" 979 "TX packets dropped:\t%" PRIu64 "\n" 980 "TX packets send:\t%" PRIu64 "\n", 981 qconf->tx_stat.call, 982 qconf->tx_stat.queue, 983 qconf->tx_stat.drop, 984 qconf->tx_stat.send); 985 } 986 } 987 } 988 989 static void 990 signal_handler(int signum) 991 { 992 queue_dump_stat(); 993 if (signum != SIGUSR1) 994 rte_exit(0, "received signal: %d, exiting\n", signum); 995 } 996 997 int 998 main(int argc, char **argv) 999 { 1000 struct lcore_queue_conf *qconf; 1001 struct rte_eth_dev_info dev_info; 1002 struct rte_eth_txconf *txconf; 1003 struct rx_queue *rxq; 1004 int ret, socket; 1005 unsigned nb_ports; 1006 uint16_t queueid; 1007 unsigned lcore_id = 0, rx_lcore_id = 0; 1008 uint32_t n_tx_queue, nb_lcores; 1009 uint16_t portid; 1010 1011 /* init EAL */ 1012 ret = rte_eal_init(argc, argv); 1013 if (ret < 0) 1014 rte_exit(EXIT_FAILURE, "Invalid EAL parameters\n"); 1015 argc -= ret; 1016 argv += ret; 1017 1018 /* parse application arguments (after the EAL ones) */ 1019 ret = parse_args(argc, argv); 1020 if (ret < 0) 1021 rte_exit(EXIT_FAILURE, "Invalid IP reassembly parameters\n"); 1022 1023 nb_ports = rte_eth_dev_count_avail(); 1024 if (nb_ports == 0) 1025 rte_exit(EXIT_FAILURE, "No ports found!\n"); 1026 1027 nb_lcores = rte_lcore_count(); 1028 1029 /* initialize structures (mempools, lpm etc.) */ 1030 if (init_mem() < 0) 1031 rte_panic("Cannot initialize memory structures!\n"); 1032 1033 /* check if portmask has non-existent ports */ 1034 if (enabled_port_mask & ~(RTE_LEN2MASK(nb_ports, unsigned))) 1035 rte_exit(EXIT_FAILURE, "Non-existent ports in portmask!\n"); 1036 1037 /* initialize all ports */ 1038 RTE_ETH_FOREACH_DEV(portid) { 1039 struct rte_eth_rxconf rxq_conf; 1040 struct rte_eth_conf local_port_conf = port_conf; 1041 1042 /* skip ports that are not enabled */ 1043 if ((enabled_port_mask & (1 << portid)) == 0) { 1044 printf("\nSkipping disabled port %d\n", portid); 1045 continue; 1046 } 1047 1048 qconf = &lcore_queue_conf[rx_lcore_id]; 1049 1050 /* limit the frame size to the maximum supported by NIC */ 1051 ret = rte_eth_dev_info_get(portid, &dev_info); 1052 if (ret != 0) 1053 rte_exit(EXIT_FAILURE, 1054 "Error during getting device (port %u) info: %s\n", 1055 portid, strerror(-ret)); 1056 1057 local_port_conf.rxmode.max_rx_pkt_len = RTE_MIN( 1058 dev_info.max_rx_pktlen, 1059 local_port_conf.rxmode.max_rx_pkt_len); 1060 1061 /* get the lcore_id for this port */ 1062 while (rte_lcore_is_enabled(rx_lcore_id) == 0 || 1063 qconf->n_rx_queue == (unsigned)rx_queue_per_lcore) { 1064 1065 rx_lcore_id++; 1066 if (rx_lcore_id >= RTE_MAX_LCORE) 1067 rte_exit(EXIT_FAILURE, "Not enough cores\n"); 1068 1069 qconf = &lcore_queue_conf[rx_lcore_id]; 1070 } 1071 1072 socket = rte_lcore_to_socket_id(portid); 1073 if (socket == SOCKET_ID_ANY) 1074 socket = 0; 1075 1076 queueid = qconf->n_rx_queue; 1077 rxq = &qconf->rx_queue_list[queueid]; 1078 rxq->portid = portid; 1079 rxq->lpm = socket_lpm[socket]; 1080 rxq->lpm6 = socket_lpm6[socket]; 1081 1082 ret = rte_eth_dev_adjust_nb_rx_tx_desc(portid, &nb_rxd, 1083 &nb_txd); 1084 if (ret < 0) 1085 rte_exit(EXIT_FAILURE, 1086 "Cannot adjust number of descriptors: err=%d, port=%d\n", 1087 ret, portid); 1088 1089 if (setup_queue_tbl(rxq, rx_lcore_id, queueid) < 0) 1090 rte_exit(EXIT_FAILURE, "Failed to set up queue table\n"); 1091 qconf->n_rx_queue++; 1092 1093 /* init port */ 1094 printf("Initializing port %d ... ", portid ); 1095 fflush(stdout); 1096 1097 n_tx_queue = nb_lcores; 1098 if (n_tx_queue > MAX_TX_QUEUE_PER_PORT) 1099 n_tx_queue = MAX_TX_QUEUE_PER_PORT; 1100 if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE) 1101 local_port_conf.txmode.offloads |= 1102 DEV_TX_OFFLOAD_MBUF_FAST_FREE; 1103 1104 local_port_conf.rx_adv_conf.rss_conf.rss_hf &= 1105 dev_info.flow_type_rss_offloads; 1106 if (local_port_conf.rx_adv_conf.rss_conf.rss_hf != 1107 port_conf.rx_adv_conf.rss_conf.rss_hf) { 1108 printf("Port %u modified RSS hash function based on hardware support," 1109 "requested:%#"PRIx64" configured:%#"PRIx64"\n", 1110 portid, 1111 port_conf.rx_adv_conf.rss_conf.rss_hf, 1112 local_port_conf.rx_adv_conf.rss_conf.rss_hf); 1113 } 1114 1115 ret = rte_eth_dev_configure(portid, 1, (uint16_t)n_tx_queue, 1116 &local_port_conf); 1117 if (ret < 0) { 1118 printf("\n"); 1119 rte_exit(EXIT_FAILURE, "Cannot configure device: " 1120 "err=%d, port=%d\n", 1121 ret, portid); 1122 } 1123 1124 /* init one RX queue */ 1125 rxq_conf = dev_info.default_rxconf; 1126 rxq_conf.offloads = local_port_conf.rxmode.offloads; 1127 ret = rte_eth_rx_queue_setup(portid, 0, nb_rxd, 1128 socket, &rxq_conf, 1129 rxq->pool); 1130 if (ret < 0) { 1131 printf("\n"); 1132 rte_exit(EXIT_FAILURE, "rte_eth_rx_queue_setup: " 1133 "err=%d, port=%d\n", 1134 ret, portid); 1135 } 1136 1137 ret = rte_eth_macaddr_get(portid, &ports_eth_addr[portid]); 1138 if (ret < 0) { 1139 printf("\n"); 1140 rte_exit(EXIT_FAILURE, 1141 "rte_eth_macaddr_get: err=%d, port=%d\n", 1142 ret, portid); 1143 } 1144 1145 print_ethaddr(" Address:", &ports_eth_addr[portid]); 1146 printf("\n"); 1147 1148 /* init one TX queue per couple (lcore,port) */ 1149 queueid = 0; 1150 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { 1151 if (rte_lcore_is_enabled(lcore_id) == 0) 1152 continue; 1153 1154 socket = (int) rte_lcore_to_socket_id(lcore_id); 1155 1156 printf("txq=%u,%d,%d ", lcore_id, queueid, socket); 1157 fflush(stdout); 1158 1159 txconf = &dev_info.default_txconf; 1160 txconf->offloads = local_port_conf.txmode.offloads; 1161 1162 ret = rte_eth_tx_queue_setup(portid, queueid, nb_txd, 1163 socket, txconf); 1164 if (ret < 0) 1165 rte_exit(EXIT_FAILURE, "rte_eth_tx_queue_setup: err=%d, " 1166 "port=%d\n", ret, portid); 1167 1168 qconf = &lcore_queue_conf[lcore_id]; 1169 qconf->tx_queue_id[portid] = queueid; 1170 setup_port_tbl(qconf, lcore_id, socket, portid); 1171 queueid++; 1172 } 1173 printf("\n"); 1174 } 1175 1176 printf("\n"); 1177 1178 /* start ports */ 1179 RTE_ETH_FOREACH_DEV(portid) { 1180 if ((enabled_port_mask & (1 << portid)) == 0) { 1181 continue; 1182 } 1183 /* Start device */ 1184 ret = rte_eth_dev_start(portid); 1185 if (ret < 0) 1186 rte_exit(EXIT_FAILURE, "rte_eth_dev_start: err=%d, port=%d\n", 1187 ret, portid); 1188 1189 ret = rte_eth_promiscuous_enable(portid); 1190 if (ret != 0) 1191 rte_exit(EXIT_FAILURE, 1192 "rte_eth_promiscuous_enable: err=%s, port=%d\n", 1193 rte_strerror(-ret), portid); 1194 } 1195 1196 if (init_routing_table() < 0) 1197 rte_exit(EXIT_FAILURE, "Cannot init routing table\n"); 1198 1199 check_all_ports_link_status(enabled_port_mask); 1200 1201 signal(SIGUSR1, signal_handler); 1202 signal(SIGTERM, signal_handler); 1203 signal(SIGINT, signal_handler); 1204 1205 /* launch per-lcore init on every lcore */ 1206 rte_eal_mp_remote_launch(main_loop, NULL, CALL_MAIN); 1207 RTE_LCORE_FOREACH_WORKER(lcore_id) { 1208 if (rte_eal_wait_lcore(lcore_id) < 0) 1209 return -1; 1210 } 1211 1212 /* clean up the EAL */ 1213 rte_eal_cleanup(); 1214 1215 return 0; 1216 } 1217