1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2014 Intel Corporation 3 */ 4 5 #include <stdint.h> 6 #include <sys/queue.h> 7 #include <stdlib.h> 8 #include <string.h> 9 #include <stdio.h> 10 #include <assert.h> 11 #include <errno.h> 12 #include <signal.h> 13 #include <stdarg.h> 14 #include <inttypes.h> 15 #include <getopt.h> 16 17 #include <rte_common.h> 18 #include <rte_log.h> 19 #include <rte_memory.h> 20 #include <rte_memcpy.h> 21 #include <rte_eal.h> 22 #include <rte_launch.h> 23 #include <rte_cycles.h> 24 #include <rte_prefetch.h> 25 #include <rte_lcore.h> 26 #include <rte_per_lcore.h> 27 #include <rte_branch_prediction.h> 28 #include <rte_interrupts.h> 29 #include <rte_random.h> 30 #include <rte_debug.h> 31 #include <rte_ether.h> 32 #include <rte_ethdev.h> 33 #include <rte_mempool.h> 34 #include <rte_mbuf.h> 35 36 #define MAX_QUEUES 1024 37 /* 38 * 1024 queues require to meet the needs of a large number of vmdq_pools. 39 * (RX/TX_queue_nb * RX/TX_ring_descriptors_nb) per port. 40 */ 41 #define NUM_MBUFS_PER_PORT (MAX_QUEUES * RTE_MAX(RX_DESC_DEFAULT, \ 42 TX_DESC_DEFAULT)) 43 #define MBUF_CACHE_SIZE 64 44 45 #define MAX_PKT_BURST 32 46 47 /* 48 * Configurable number of RX/TX ring descriptors 49 */ 50 #define RX_DESC_DEFAULT 1024 51 #define TX_DESC_DEFAULT 1024 52 53 #define INVALID_PORT_ID 0xFF 54 55 /* mask of enabled ports */ 56 static uint32_t enabled_port_mask; 57 58 /* number of pools (if user does not specify any, 8 by default */ 59 static uint32_t num_queues = 8; 60 static uint32_t num_pools = 8; 61 static uint8_t rss_enable; 62 63 /* Default structure for VMDq. 8< */ 64 65 /* empty VMDq configuration structure. Filled in programmatically */ 66 static const struct rte_eth_conf vmdq_conf_default = { 67 .rxmode = { 68 .mq_mode = RTE_ETH_MQ_RX_VMDQ_ONLY, 69 }, 70 71 .txmode = { 72 .mq_mode = RTE_ETH_MQ_TX_NONE, 73 }, 74 .rx_adv_conf = { 75 /* 76 * should be overridden separately in code with 77 * appropriate values 78 */ 79 .vmdq_rx_conf = { 80 .nb_queue_pools = RTE_ETH_8_POOLS, 81 .enable_default_pool = 0, 82 .default_pool = 0, 83 .nb_pool_maps = 0, 84 .pool_map = {{0, 0},}, 85 }, 86 }, 87 }; 88 /* >8 End of Empty vdmq configuration structure. */ 89 90 static unsigned lcore_ids[RTE_MAX_LCORE]; 91 static uint16_t ports[RTE_MAX_ETHPORTS]; 92 static unsigned num_ports; /**< The number of ports specified in command line */ 93 94 /* array used for printing out statistics */ 95 volatile unsigned long rxPackets[MAX_QUEUES] = {0}; 96 97 /* vlan_tags 8< */ 98 const uint16_t vlan_tags[] = { 99 0, 1, 2, 3, 4, 5, 6, 7, 100 8, 9, 10, 11, 12, 13, 14, 15, 101 16, 17, 18, 19, 20, 21, 22, 23, 102 24, 25, 26, 27, 28, 29, 30, 31, 103 32, 33, 34, 35, 36, 37, 38, 39, 104 40, 41, 42, 43, 44, 45, 46, 47, 105 48, 49, 50, 51, 52, 53, 54, 55, 106 56, 57, 58, 59, 60, 61, 62, 63, 107 }; 108 /* >8 End of vlan_tags. */ 109 110 const uint16_t num_vlans = RTE_DIM(vlan_tags); 111 static uint16_t num_pf_queues, num_vmdq_queues; 112 static uint16_t vmdq_pool_base, vmdq_queue_base; 113 114 /* Pool mac address template. 8< */ 115 116 /* pool mac addr template, pool mac addr is like: 52 54 00 12 port# pool# */ 117 static struct rte_ether_addr pool_addr_template = { 118 .addr_bytes = {0x52, 0x54, 0x00, 0x12, 0x00, 0x00} 119 }; 120 /* >8 End of mac addr template. */ 121 122 /* ethernet addresses of ports */ 123 static struct rte_ether_addr vmdq_ports_eth_addr[RTE_MAX_ETHPORTS]; 124 125 #define MAX_QUEUE_NUM_10G 128 126 #define MAX_QUEUE_NUM_1G 8 127 #define MAX_POOL_MAP_NUM_10G 64 128 #define MAX_POOL_MAP_NUM_1G 32 129 #define MAX_POOL_NUM_10G 64 130 #define MAX_POOL_NUM_1G 8 131 /* 132 * Builds up the correct configuration for vmdq based on the vlan tags array 133 * given above, and determine the queue number and pool map number according to 134 * valid pool number 135 */ 136 137 /* Building correct configuration for vdmq. 8< */ 138 static inline int 139 get_eth_conf(struct rte_eth_conf *eth_conf, uint32_t num_pools) 140 { 141 struct rte_eth_vmdq_rx_conf conf; 142 unsigned i; 143 144 conf.nb_queue_pools = (enum rte_eth_nb_pools)num_pools; 145 conf.nb_pool_maps = num_pools; 146 conf.enable_default_pool = 0; 147 conf.default_pool = 0; /* set explicit value, even if not used */ 148 149 for (i = 0; i < conf.nb_pool_maps; i++) { 150 conf.pool_map[i].vlan_id = vlan_tags[i]; 151 conf.pool_map[i].pools = (1UL << (i % num_pools)); 152 } 153 154 (void)(rte_memcpy(eth_conf, &vmdq_conf_default, sizeof(*eth_conf))); 155 (void)(rte_memcpy(ð_conf->rx_adv_conf.vmdq_rx_conf, &conf, 156 sizeof(eth_conf->rx_adv_conf.vmdq_rx_conf))); 157 if (rss_enable) { 158 eth_conf->rxmode.mq_mode = RTE_ETH_MQ_RX_VMDQ_RSS; 159 eth_conf->rx_adv_conf.rss_conf.rss_hf = RTE_ETH_RSS_IP | 160 RTE_ETH_RSS_UDP | 161 RTE_ETH_RSS_TCP | 162 RTE_ETH_RSS_SCTP; 163 } 164 return 0; 165 } 166 167 /* 168 * Initialises a given port using global settings and with the rx buffers 169 * coming from the mbuf_pool passed as parameter 170 */ 171 static inline int 172 port_init(uint16_t port, struct rte_mempool *mbuf_pool) 173 { 174 struct rte_eth_dev_info dev_info; 175 struct rte_eth_rxconf *rxconf; 176 struct rte_eth_txconf *txconf; 177 struct rte_eth_conf port_conf; 178 uint16_t rxRings, txRings; 179 uint16_t rxRingSize = RX_DESC_DEFAULT; 180 uint16_t txRingSize = TX_DESC_DEFAULT; 181 int retval; 182 uint16_t q; 183 uint16_t queues_per_pool; 184 uint32_t max_nb_pools; 185 uint64_t rss_hf_tmp; 186 187 /* 188 * The max pool number from dev_info will be used to validate the pool 189 * number specified in cmd line 190 */ 191 retval = rte_eth_dev_info_get(port, &dev_info); 192 if (retval != 0) { 193 printf("Error during getting device (port %u) info: %s\n", 194 port, strerror(-retval)); 195 return retval; 196 } 197 198 max_nb_pools = (uint32_t)dev_info.max_vmdq_pools; 199 /* 200 * We allow to process part of VMDQ pools specified by num_pools in 201 * command line. 202 */ 203 if (num_pools > max_nb_pools) { 204 printf("num_pools %d >max_nb_pools %d\n", 205 num_pools, max_nb_pools); 206 return -1; 207 } 208 retval = get_eth_conf(&port_conf, max_nb_pools); 209 if (retval < 0) 210 return retval; 211 212 /* 213 * NIC queues are divided into pf queues and vmdq queues. 214 */ 215 /* There is assumption here all ports have the same configuration! */ 216 num_pf_queues = dev_info.max_rx_queues - dev_info.vmdq_queue_num; 217 queues_per_pool = dev_info.vmdq_queue_num / dev_info.max_vmdq_pools; 218 num_vmdq_queues = num_pools * queues_per_pool; 219 num_queues = num_pf_queues + num_vmdq_queues; 220 vmdq_queue_base = dev_info.vmdq_queue_base; 221 vmdq_pool_base = dev_info.vmdq_pool_base; 222 223 printf("pf queue num: %u, configured vmdq pool num: %u," 224 " each vmdq pool has %u queues\n", 225 num_pf_queues, num_pools, queues_per_pool); 226 printf("vmdq queue base: %d pool base %d\n", 227 vmdq_queue_base, vmdq_pool_base); 228 if (!rte_eth_dev_is_valid_port(port)) 229 return -1; 230 231 rss_hf_tmp = port_conf.rx_adv_conf.rss_conf.rss_hf; 232 port_conf.rx_adv_conf.rss_conf.rss_hf &= 233 dev_info.flow_type_rss_offloads; 234 if (port_conf.rx_adv_conf.rss_conf.rss_hf != rss_hf_tmp) { 235 printf("Port %u modified RSS hash function based on hardware support," 236 "requested:%#"PRIx64" configured:%#"PRIx64"\n", 237 port, 238 rss_hf_tmp, 239 port_conf.rx_adv_conf.rss_conf.rss_hf); 240 } 241 242 /* 243 * Though in this example, we only receive packets from the first queue 244 * of each pool and send packets through first rte_lcore_count() tx 245 * queues of vmdq queues, all queues including pf queues are setup. 246 * This is because VMDQ queues doesn't always start from zero, and the 247 * PMD layer doesn't support selectively initialising part of rx/tx 248 * queues. 249 */ 250 rxRings = (uint16_t)dev_info.max_rx_queues; 251 txRings = (uint16_t)dev_info.max_tx_queues; 252 253 retval = rte_eth_dev_info_get(port, &dev_info); 254 if (retval != 0) { 255 printf("Error during getting device (port %u) info: %s\n", 256 port, strerror(-retval)); 257 return retval; 258 } 259 260 if (dev_info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) 261 port_conf.txmode.offloads |= 262 RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE; 263 retval = rte_eth_dev_configure(port, rxRings, txRings, &port_conf); 264 if (retval != 0) 265 return retval; 266 267 retval = rte_eth_dev_adjust_nb_rx_tx_desc(port, &rxRingSize, 268 &txRingSize); 269 if (retval != 0) 270 return retval; 271 if (RTE_MAX(rxRingSize, txRingSize) > RTE_MAX(RX_DESC_DEFAULT, 272 TX_DESC_DEFAULT)) { 273 printf("Mbuf pool has an insufficient size for port %u.\n", 274 port); 275 return -1; 276 } 277 278 rxconf = &dev_info.default_rxconf; 279 rxconf->rx_drop_en = 1; 280 txconf = &dev_info.default_txconf; 281 txconf->offloads = port_conf.txmode.offloads; 282 for (q = 0; q < rxRings; q++) { 283 retval = rte_eth_rx_queue_setup(port, q, rxRingSize, 284 rte_eth_dev_socket_id(port), 285 rxconf, 286 mbuf_pool); 287 if (retval < 0) { 288 printf("initialise rx queue %d failed\n", q); 289 return retval; 290 } 291 } 292 293 for (q = 0; q < txRings; q++) { 294 retval = rte_eth_tx_queue_setup(port, q, txRingSize, 295 rte_eth_dev_socket_id(port), 296 txconf); 297 if (retval < 0) { 298 printf("initialise tx queue %d failed\n", q); 299 return retval; 300 } 301 } 302 303 retval = rte_eth_dev_start(port); 304 if (retval < 0) { 305 printf("port %d start failed\n", port); 306 return retval; 307 } 308 309 retval = rte_eth_macaddr_get(port, &vmdq_ports_eth_addr[port]); 310 if (retval < 0) { 311 printf("port %d MAC address get failed: %s\n", port, 312 rte_strerror(-retval)); 313 return retval; 314 } 315 printf("Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8 316 " %02"PRIx8" %02"PRIx8" %02"PRIx8"\n", 317 (unsigned)port, 318 RTE_ETHER_ADDR_BYTES(&vmdq_ports_eth_addr[port])); 319 320 /* 321 * Set mac for each pool. 322 * There is no default mac for the pools in i40. 323 * Removes this after i40e fixes this issue. 324 */ 325 for (q = 0; q < num_pools; q++) { 326 struct rte_ether_addr mac; 327 mac = pool_addr_template; 328 mac.addr_bytes[4] = port; 329 mac.addr_bytes[5] = q; 330 printf("Port %u vmdq pool %u set mac " RTE_ETHER_ADDR_PRT_FMT "\n", 331 port, q, RTE_ETHER_ADDR_BYTES(&mac)); 332 retval = rte_eth_dev_mac_addr_add(port, &mac, 333 q + vmdq_pool_base); 334 if (retval) { 335 printf("mac addr add failed at pool %d\n", q); 336 return retval; 337 } 338 } 339 340 return 0; 341 } 342 /* >8 End of get_eth_conf. */ 343 344 /* Check num_pools parameter and set it if OK*/ 345 static int 346 vmdq_parse_num_pools(const char *q_arg) 347 { 348 char *end = NULL; 349 int n; 350 351 /* parse number string */ 352 n = strtol(q_arg, &end, 10); 353 if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0')) 354 return -1; 355 356 if (num_pools > num_vlans) { 357 printf("num_pools %d > num_vlans %d\n", num_pools, num_vlans); 358 return -1; 359 } 360 361 num_pools = n; 362 363 return 0; 364 } 365 366 367 static int 368 parse_portmask(const char *portmask) 369 { 370 char *end = NULL; 371 unsigned long pm; 372 373 /* parse hexadecimal string */ 374 pm = strtoul(portmask, &end, 16); 375 if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0')) 376 return 0; 377 378 return pm; 379 } 380 381 /* Display usage */ 382 static void 383 vmdq_usage(const char *prgname) 384 { 385 printf("%s [EAL options] -- -p PORTMASK]\n" 386 " --nb-pools NP: number of pools\n" 387 " --enable-rss: enable RSS (disabled by default)\n", 388 prgname); 389 } 390 391 /* Parse the argument (num_pools) given in the command line of the application */ 392 static int 393 vmdq_parse_args(int argc, char **argv) 394 { 395 int opt; 396 int option_index; 397 unsigned i; 398 const char *prgname = argv[0]; 399 static struct option long_option[] = { 400 {"nb-pools", required_argument, NULL, 0}, 401 {"enable-rss", 0, NULL, 0}, 402 {NULL, 0, 0, 0} 403 }; 404 405 /* Parse command line */ 406 while ((opt = getopt_long(argc, argv, "p:", long_option, 407 &option_index)) != EOF) { 408 switch (opt) { 409 /* portmask */ 410 case 'p': 411 enabled_port_mask = parse_portmask(optarg); 412 if (enabled_port_mask == 0) { 413 printf("invalid portmask\n"); 414 vmdq_usage(prgname); 415 return -1; 416 } 417 break; 418 case 0: 419 if (!strcmp(long_option[option_index].name, 420 "nb-pools")) { 421 if (vmdq_parse_num_pools(optarg) == -1) { 422 printf("invalid number of pools\n"); 423 vmdq_usage(prgname); 424 return -1; 425 } 426 } 427 428 if (!strcmp(long_option[option_index].name, 429 "enable-rss")) 430 rss_enable = 1; 431 break; 432 433 default: 434 vmdq_usage(prgname); 435 return -1; 436 } 437 } 438 439 for (i = 0; i < RTE_MAX_ETHPORTS; i++) { 440 if (enabled_port_mask & (1 << i)) 441 ports[num_ports++] = (uint8_t)i; 442 } 443 444 if (num_ports < 2 || num_ports % 2) { 445 printf("Current enabled port number is %u," 446 "but it should be even and at least 2\n", num_ports); 447 return -1; 448 } 449 450 return 0; 451 } 452 453 static void 454 update_mac_address(struct rte_mbuf *m, unsigned dst_port) 455 { 456 struct rte_ether_hdr *eth; 457 void *tmp; 458 459 eth = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); 460 461 /* 02:00:00:00:00:xx */ 462 tmp = ð->dst_addr.addr_bytes[0]; 463 *((uint64_t *)tmp) = 0x000000000002 + ((uint64_t)dst_port << 40); 464 465 /* src addr */ 466 rte_ether_addr_copy(&vmdq_ports_eth_addr[dst_port], ð->src_addr); 467 } 468 469 /* When we receive a HUP signal, print out our stats */ 470 static void 471 sighup_handler(int signum) 472 { 473 unsigned int q = vmdq_queue_base; 474 for (; q < num_queues; q++) { 475 if ((q - vmdq_queue_base) % (num_vmdq_queues / num_pools) == 0) 476 printf("\nPool %u: ", (q - vmdq_queue_base) / 477 (num_vmdq_queues / num_pools)); 478 printf("%lu ", rxPackets[q]); 479 } 480 printf("\nFinished handling signal %d\n", signum); 481 } 482 483 /* 484 * Main thread that does the work, reading from INPUT_PORT 485 * and writing to OUTPUT_PORT 486 */ 487 static int 488 lcore_main(__rte_unused void *dummy) 489 { 490 const uint16_t lcore_id = (uint16_t)rte_lcore_id(); 491 const uint16_t num_cores = (uint16_t)rte_lcore_count(); 492 uint16_t core_id = 0; 493 uint16_t startQueue, endQueue; 494 uint16_t q, i, p; 495 const uint16_t remainder = (uint16_t)(num_vmdq_queues % num_cores); 496 497 for (i = 0; i < num_cores; i++) 498 if (lcore_ids[i] == lcore_id) { 499 core_id = i; 500 break; 501 } 502 503 if (remainder != 0) { 504 if (core_id < remainder) { 505 startQueue = (uint16_t)(core_id * 506 (num_vmdq_queues / num_cores + 1)); 507 endQueue = (uint16_t)(startQueue + 508 (num_vmdq_queues / num_cores) + 1); 509 } else { 510 startQueue = (uint16_t)(core_id * 511 (num_vmdq_queues / num_cores) + 512 remainder); 513 endQueue = (uint16_t)(startQueue + 514 (num_vmdq_queues / num_cores)); 515 } 516 } else { 517 startQueue = (uint16_t)(core_id * 518 (num_vmdq_queues / num_cores)); 519 endQueue = (uint16_t)(startQueue + 520 (num_vmdq_queues / num_cores)); 521 } 522 523 /* vmdq queue idx doesn't always start from zero.*/ 524 startQueue += vmdq_queue_base; 525 endQueue += vmdq_queue_base; 526 printf("core %u(lcore %u) reading queues %i-%i\n", (unsigned)core_id, 527 (unsigned)lcore_id, startQueue, endQueue - 1); 528 529 if (startQueue == endQueue) { 530 printf("lcore %u has nothing to do\n", lcore_id); 531 return 0; 532 } 533 534 for (;;) { 535 struct rte_mbuf *buf[MAX_PKT_BURST]; 536 const uint16_t buf_size = RTE_DIM(buf); 537 538 for (p = 0; p < num_ports; p++) { 539 const uint8_t sport = ports[p]; 540 /* 0 <-> 1, 2 <-> 3 etc */ 541 const uint8_t dport = ports[p ^ 1]; 542 if ((sport == INVALID_PORT_ID) || (dport == INVALID_PORT_ID)) 543 continue; 544 545 for (q = startQueue; q < endQueue; q++) { 546 const uint16_t rxCount = rte_eth_rx_burst(sport, 547 q, buf, buf_size); 548 549 if (unlikely(rxCount == 0)) 550 continue; 551 552 rxPackets[q] += rxCount; 553 554 for (i = 0; i < rxCount; i++) 555 update_mac_address(buf[i], dport); 556 557 const uint16_t txCount = rte_eth_tx_burst(dport, 558 vmdq_queue_base + core_id, 559 buf, 560 rxCount); 561 562 if (txCount != rxCount) { 563 for (i = txCount; i < rxCount; i++) 564 rte_pktmbuf_free(buf[i]); 565 } 566 } 567 } 568 } 569 } 570 571 /* 572 * Update the global var NUM_PORTS and array PORTS according to system ports number 573 * and return valid ports number 574 */ 575 static unsigned check_ports_num(unsigned nb_ports) 576 { 577 unsigned valid_num_ports = num_ports; 578 unsigned portid; 579 580 if (num_ports > nb_ports) { 581 printf("\nSpecified port number(%u) exceeds total system port number(%u)\n", 582 num_ports, nb_ports); 583 num_ports = nb_ports; 584 } 585 586 for (portid = 0; portid < num_ports; portid++) { 587 if (!rte_eth_dev_is_valid_port(ports[portid])) { 588 printf("\nSpecified port ID(%u) is not valid\n", 589 ports[portid]); 590 ports[portid] = INVALID_PORT_ID; 591 valid_num_ports--; 592 } 593 } 594 return valid_num_ports; 595 } 596 597 /* Main function, does initialisation and calls the per-lcore functions */ 598 int 599 main(int argc, char *argv[]) 600 { 601 struct rte_mempool *mbuf_pool; 602 unsigned lcore_id, core_id = 0; 603 int ret; 604 unsigned nb_ports, valid_num_ports; 605 uint16_t portid; 606 607 signal(SIGHUP, sighup_handler); 608 609 /* init EAL */ 610 ret = rte_eal_init(argc, argv); 611 if (ret < 0) 612 rte_exit(EXIT_FAILURE, "Error with EAL initialization\n"); 613 argc -= ret; 614 argv += ret; 615 616 /* parse app arguments */ 617 ret = vmdq_parse_args(argc, argv); 618 if (ret < 0) 619 rte_exit(EXIT_FAILURE, "Invalid VMDQ argument\n"); 620 621 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) 622 if (rte_lcore_is_enabled(lcore_id)) 623 lcore_ids[core_id++] = lcore_id; 624 625 if (rte_lcore_count() > RTE_MAX_LCORE) 626 rte_exit(EXIT_FAILURE, "Not enough cores\n"); 627 628 nb_ports = rte_eth_dev_count_avail(); 629 630 /* 631 * Update the global var NUM_PORTS and global array PORTS 632 * and get value of var VALID_NUM_PORTS according to system ports number 633 */ 634 valid_num_ports = check_ports_num(nb_ports); 635 636 if (valid_num_ports < 2 || valid_num_ports % 2) { 637 printf("Current valid ports number is %u\n", valid_num_ports); 638 rte_exit(EXIT_FAILURE, "Error with valid ports number is not even or less than 2\n"); 639 } 640 641 mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL", 642 NUM_MBUFS_PER_PORT * nb_ports, MBUF_CACHE_SIZE, 643 0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id()); 644 if (mbuf_pool == NULL) 645 rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n"); 646 647 /* initialize all ports */ 648 RTE_ETH_FOREACH_DEV(portid) { 649 /* skip ports that are not enabled */ 650 if ((enabled_port_mask & (1 << portid)) == 0) { 651 printf("\nSkipping disabled port %d\n", portid); 652 continue; 653 } 654 if (port_init(portid, mbuf_pool) != 0) 655 rte_exit(EXIT_FAILURE, "Cannot initialize network ports\n"); 656 } 657 658 /* call lcore_main() on every lcore */ 659 rte_eal_mp_remote_launch(lcore_main, NULL, CALL_MAIN); 660 RTE_LCORE_FOREACH_WORKER(lcore_id) { 661 if (rte_eal_wait_lcore(lcore_id) < 0) 662 return -1; 663 } 664 665 /* clean up the EAL */ 666 rte_eal_cleanup(); 667 668 return 0; 669 } 670