1 /*- 2 * BSD LICENSE 3 * 4 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <stdint.h> 35 #include <sys/queue.h> 36 #include <stdlib.h> 37 #include <string.h> 38 #include <stdio.h> 39 #include <assert.h> 40 #include <errno.h> 41 #include <signal.h> 42 #include <stdarg.h> 43 #include <inttypes.h> 44 #include <getopt.h> 45 46 #include <rte_common.h> 47 #include <rte_log.h> 48 #include <rte_memory.h> 49 #include <rte_memcpy.h> 50 #include <rte_memzone.h> 51 #include <rte_eal.h> 52 #include <rte_per_lcore.h> 53 #include <rte_launch.h> 54 #include <rte_atomic.h> 55 #include <rte_cycles.h> 56 #include <rte_prefetch.h> 57 #include <rte_lcore.h> 58 #include <rte_per_lcore.h> 59 #include <rte_branch_prediction.h> 60 #include <rte_interrupts.h> 61 #include <rte_pci.h> 62 #include <rte_random.h> 63 #include <rte_debug.h> 64 #include <rte_ether.h> 65 #include <rte_ethdev.h> 66 #include <rte_ring.h> 67 #include <rte_log.h> 68 #include <rte_mempool.h> 69 #include <rte_mbuf.h> 70 #include <rte_memcpy.h> 71 72 #define MAX_QUEUES 128 73 /* 74 * For 10 GbE, 128 queues require roughly 75 * 128*512 (RX/TX_queue_nb * RX/TX_ring_descriptors_nb) per port. 76 */ 77 #define NUM_MBUFS_PER_PORT (128*512) 78 #define MBUF_CACHE_SIZE 64 79 #define MBUF_DATA_SIZE (2048 + RTE_PKTMBUF_HEADROOM) 80 81 #define MAX_PKT_BURST 32 82 83 /* 84 * Configurable number of RX/TX ring descriptors 85 */ 86 #define RTE_TEST_RX_DESC_DEFAULT 128 87 #define RTE_TEST_TX_DESC_DEFAULT 512 88 89 #define INVALID_PORT_ID 0xFF 90 91 /* mask of enabled ports */ 92 static uint32_t enabled_port_mask; 93 94 /* number of pools (if user does not specify any, 8 by default */ 95 static uint32_t num_queues = 8; 96 static uint32_t num_pools = 8; 97 98 /* empty vmdq configuration structure. Filled in programatically */ 99 static const struct rte_eth_conf vmdq_conf_default = { 100 .rxmode = { 101 .mq_mode = ETH_MQ_RX_VMDQ_ONLY, 102 .split_hdr_size = 0, 103 .header_split = 0, /**< Header Split disabled */ 104 .hw_ip_checksum = 0, /**< IP checksum offload disabled */ 105 .hw_vlan_filter = 0, /**< VLAN filtering disabled */ 106 .jumbo_frame = 0, /**< Jumbo Frame Support disabled */ 107 }, 108 109 .txmode = { 110 .mq_mode = ETH_MQ_TX_NONE, 111 }, 112 .rx_adv_conf = { 113 /* 114 * should be overridden separately in code with 115 * appropriate values 116 */ 117 .vmdq_rx_conf = { 118 .nb_queue_pools = ETH_8_POOLS, 119 .enable_default_pool = 0, 120 .default_pool = 0, 121 .nb_pool_maps = 0, 122 .pool_map = {{0, 0},}, 123 }, 124 }, 125 }; 126 127 static unsigned lcore_ids[RTE_MAX_LCORE]; 128 static uint8_t ports[RTE_MAX_ETHPORTS]; 129 static unsigned num_ports; /**< The number of ports specified in command line */ 130 131 /* array used for printing out statistics */ 132 volatile unsigned long rxPackets[MAX_QUEUES] = {0}; 133 134 const uint16_t vlan_tags[] = { 135 0, 1, 2, 3, 4, 5, 6, 7, 136 8, 9, 10, 11, 12, 13, 14, 15, 137 16, 17, 18, 19, 20, 21, 22, 23, 138 24, 25, 26, 27, 28, 29, 30, 31, 139 32, 33, 34, 35, 36, 37, 38, 39, 140 40, 41, 42, 43, 44, 45, 46, 47, 141 48, 49, 50, 51, 52, 53, 54, 55, 142 56, 57, 58, 59, 60, 61, 62, 63, 143 }; 144 const uint16_t num_vlans = RTE_DIM(vlan_tags); 145 static uint16_t num_pf_queues, num_vmdq_queues; 146 static uint16_t vmdq_pool_base, vmdq_queue_base; 147 /* pool mac addr template, pool mac addr is like: 52 54 00 12 port# pool# */ 148 static struct ether_addr pool_addr_template = { 149 .addr_bytes = {0x52, 0x54, 0x00, 0x12, 0x00, 0x00} 150 }; 151 152 /* ethernet addresses of ports */ 153 static struct ether_addr vmdq_ports_eth_addr[RTE_MAX_ETHPORTS]; 154 155 #define MAX_QUEUE_NUM_10G 128 156 #define MAX_QUEUE_NUM_1G 8 157 #define MAX_POOL_MAP_NUM_10G 64 158 #define MAX_POOL_MAP_NUM_1G 32 159 #define MAX_POOL_NUM_10G 64 160 #define MAX_POOL_NUM_1G 8 161 /* 162 * Builds up the correct configuration for vmdq based on the vlan tags array 163 * given above, and determine the queue number and pool map number according to 164 * valid pool number 165 */ 166 static inline int 167 get_eth_conf(struct rte_eth_conf *eth_conf, uint32_t num_pools) 168 { 169 struct rte_eth_vmdq_rx_conf conf; 170 unsigned i; 171 172 conf.nb_queue_pools = (enum rte_eth_nb_pools)num_pools; 173 conf.nb_pool_maps = num_pools; 174 conf.enable_default_pool = 0; 175 conf.default_pool = 0; /* set explicit value, even if not used */ 176 177 for (i = 0; i < conf.nb_pool_maps; i++) { 178 conf.pool_map[i].vlan_id = vlan_tags[i]; 179 conf.pool_map[i].pools = (1UL << (i % num_pools)); 180 } 181 182 (void)(rte_memcpy(eth_conf, &vmdq_conf_default, sizeof(*eth_conf))); 183 (void)(rte_memcpy(ð_conf->rx_adv_conf.vmdq_rx_conf, &conf, 184 sizeof(eth_conf->rx_adv_conf.vmdq_rx_conf))); 185 return 0; 186 } 187 188 /* 189 * Initialises a given port using global settings and with the rx buffers 190 * coming from the mbuf_pool passed as parameter 191 */ 192 static inline int 193 port_init(uint8_t port, struct rte_mempool *mbuf_pool) 194 { 195 struct rte_eth_dev_info dev_info; 196 struct rte_eth_rxconf *rxconf; 197 struct rte_eth_conf port_conf; 198 uint16_t rxRings, txRings; 199 const uint16_t rxRingSize = RTE_TEST_RX_DESC_DEFAULT, txRingSize = RTE_TEST_TX_DESC_DEFAULT; 200 int retval; 201 uint16_t q; 202 uint16_t queues_per_pool; 203 uint32_t max_nb_pools; 204 205 /* 206 * The max pool number from dev_info will be used to validate the pool 207 * number specified in cmd line 208 */ 209 rte_eth_dev_info_get(port, &dev_info); 210 max_nb_pools = (uint32_t)dev_info.max_vmdq_pools; 211 /* 212 * We allow to process part of VMDQ pools specified by num_pools in 213 * command line. 214 */ 215 if (num_pools > max_nb_pools) { 216 printf("num_pools %d >max_nb_pools %d\n", 217 num_pools, max_nb_pools); 218 return -1; 219 } 220 retval = get_eth_conf(&port_conf, max_nb_pools); 221 if (retval < 0) 222 return retval; 223 224 /* 225 * NIC queues are divided into pf queues and vmdq queues. 226 */ 227 /* There is assumption here all ports have the same configuration! */ 228 num_pf_queues = dev_info.max_rx_queues - dev_info.vmdq_queue_num; 229 queues_per_pool = dev_info.vmdq_queue_num / dev_info.max_vmdq_pools; 230 num_vmdq_queues = num_pools * queues_per_pool; 231 num_queues = num_pf_queues + num_vmdq_queues; 232 vmdq_queue_base = dev_info.vmdq_queue_base; 233 vmdq_pool_base = dev_info.vmdq_pool_base; 234 235 printf("pf queue num: %u, configured vmdq pool num: %u," 236 " each vmdq pool has %u queues\n", 237 num_pf_queues, num_pools, queues_per_pool); 238 printf("vmdq queue base: %d pool base %d\n", 239 vmdq_queue_base, vmdq_pool_base); 240 if (port >= rte_eth_dev_count()) 241 return -1; 242 243 /* 244 * Though in this example, we only receive packets from the first queue 245 * of each pool and send packets through first rte_lcore_count() tx 246 * queues of vmdq queues, all queues including pf queues are setup. 247 * This is because VMDQ queues doesn't always start from zero, and the 248 * PMD layer doesn't support selectively initialising part of rx/tx 249 * queues. 250 */ 251 rxRings = (uint16_t)dev_info.max_rx_queues; 252 txRings = (uint16_t)dev_info.max_tx_queues; 253 retval = rte_eth_dev_configure(port, rxRings, txRings, &port_conf); 254 if (retval != 0) 255 return retval; 256 257 rte_eth_dev_info_get(port, &dev_info); 258 rxconf = &dev_info.default_rxconf; 259 rxconf->rx_drop_en = 1; 260 for (q = 0; q < rxRings; q++) { 261 retval = rte_eth_rx_queue_setup(port, q, rxRingSize, 262 rte_eth_dev_socket_id(port), 263 rxconf, 264 mbuf_pool); 265 if (retval < 0) { 266 printf("initialise rx queue %d failed\n", q); 267 return retval; 268 } 269 } 270 271 for (q = 0; q < txRings; q++) { 272 retval = rte_eth_tx_queue_setup(port, q, txRingSize, 273 rte_eth_dev_socket_id(port), 274 NULL); 275 if (retval < 0) { 276 printf("initialise tx queue %d failed\n", q); 277 return retval; 278 } 279 } 280 281 retval = rte_eth_dev_start(port); 282 if (retval < 0) { 283 printf("port %d start failed\n", port); 284 return retval; 285 } 286 287 rte_eth_macaddr_get(port, &vmdq_ports_eth_addr[port]); 288 printf("Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8 289 " %02"PRIx8" %02"PRIx8" %02"PRIx8"\n", 290 (unsigned)port, 291 vmdq_ports_eth_addr[port].addr_bytes[0], 292 vmdq_ports_eth_addr[port].addr_bytes[1], 293 vmdq_ports_eth_addr[port].addr_bytes[2], 294 vmdq_ports_eth_addr[port].addr_bytes[3], 295 vmdq_ports_eth_addr[port].addr_bytes[4], 296 vmdq_ports_eth_addr[port].addr_bytes[5]); 297 298 /* 299 * Set mac for each pool. 300 * There is no default mac for the pools in i40. 301 * Removes this after i40e fixes this issue. 302 */ 303 for (q = 0; q < num_pools; q++) { 304 struct ether_addr mac; 305 mac = pool_addr_template; 306 mac.addr_bytes[4] = port; 307 mac.addr_bytes[5] = q; 308 printf("Port %u vmdq pool %u set mac %02x:%02x:%02x:%02x:%02x:%02x\n", 309 port, q, 310 mac.addr_bytes[0], mac.addr_bytes[1], 311 mac.addr_bytes[2], mac.addr_bytes[3], 312 mac.addr_bytes[4], mac.addr_bytes[5]); 313 retval = rte_eth_dev_mac_addr_add(port, &mac, 314 q + vmdq_pool_base); 315 if (retval) { 316 printf("mac addr add failed at pool %d\n", q); 317 return retval; 318 } 319 } 320 321 return 0; 322 } 323 324 /* Check num_pools parameter and set it if OK*/ 325 static int 326 vmdq_parse_num_pools(const char *q_arg) 327 { 328 char *end = NULL; 329 int n; 330 331 /* parse number string */ 332 n = strtol(q_arg, &end, 10); 333 if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0')) 334 return -1; 335 336 if (num_pools > num_vlans) { 337 printf("num_pools %d > num_vlans %d\n", num_pools, num_vlans); 338 return -1; 339 } 340 341 num_pools = n; 342 343 return 0; 344 } 345 346 347 static int 348 parse_portmask(const char *portmask) 349 { 350 char *end = NULL; 351 unsigned long pm; 352 353 /* parse hexadecimal string */ 354 pm = strtoul(portmask, &end, 16); 355 if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0')) 356 return -1; 357 358 if (pm == 0) 359 return -1; 360 361 return pm; 362 } 363 364 /* Display usage */ 365 static void 366 vmdq_usage(const char *prgname) 367 { 368 printf("%s [EAL options] -- -p PORTMASK]\n" 369 " --nb-pools NP: number of pools\n", 370 prgname); 371 } 372 373 /* Parse the argument (num_pools) given in the command line of the application */ 374 static int 375 vmdq_parse_args(int argc, char **argv) 376 { 377 int opt; 378 int option_index; 379 unsigned i; 380 const char *prgname = argv[0]; 381 static struct option long_option[] = { 382 {"nb-pools", required_argument, NULL, 0}, 383 {NULL, 0, 0, 0} 384 }; 385 386 /* Parse command line */ 387 while ((opt = getopt_long(argc, argv, "p:", long_option, 388 &option_index)) != EOF) { 389 switch (opt) { 390 /* portmask */ 391 case 'p': 392 enabled_port_mask = parse_portmask(optarg); 393 if (enabled_port_mask == 0) { 394 printf("invalid portmask\n"); 395 vmdq_usage(prgname); 396 return -1; 397 } 398 break; 399 case 0: 400 if (vmdq_parse_num_pools(optarg) == -1) { 401 printf("invalid number of pools\n"); 402 vmdq_usage(prgname); 403 return -1; 404 } 405 break; 406 407 default: 408 vmdq_usage(prgname); 409 return -1; 410 } 411 } 412 413 for (i = 0; i < RTE_MAX_ETHPORTS; i++) { 414 if (enabled_port_mask & (1 << i)) 415 ports[num_ports++] = (uint8_t)i; 416 } 417 418 if (num_ports < 2 || num_ports % 2) { 419 printf("Current enabled port number is %u," 420 "but it should be even and at least 2\n", num_ports); 421 return -1; 422 } 423 424 return 0; 425 } 426 427 static void 428 update_mac_address(struct rte_mbuf *m, unsigned dst_port) 429 { 430 struct ether_hdr *eth; 431 void *tmp; 432 433 eth = rte_pktmbuf_mtod(m, struct ether_hdr *); 434 435 /* 02:00:00:00:00:xx */ 436 tmp = ð->d_addr.addr_bytes[0]; 437 *((uint64_t *)tmp) = 0x000000000002 + ((uint64_t)dst_port << 40); 438 439 /* src addr */ 440 ether_addr_copy(&vmdq_ports_eth_addr[dst_port], ð->s_addr); 441 } 442 443 /* When we receive a HUP signal, print out our stats */ 444 static void 445 sighup_handler(int signum) 446 { 447 unsigned q; 448 for (q = 0; q < num_queues; q++) { 449 if (q % (num_queues/num_pools) == 0) 450 printf("\nPool %u: ", q/(num_queues/num_pools)); 451 printf("%lu ", rxPackets[q]); 452 } 453 printf("\nFinished handling signal %d\n", signum); 454 } 455 456 /* 457 * Main thread that does the work, reading from INPUT_PORT 458 * and writing to OUTPUT_PORT 459 */ 460 static int 461 lcore_main(__attribute__((__unused__)) void *dummy) 462 { 463 const uint16_t lcore_id = (uint16_t)rte_lcore_id(); 464 const uint16_t num_cores = (uint16_t)rte_lcore_count(); 465 uint16_t core_id = 0; 466 uint16_t startQueue, endQueue; 467 uint16_t q, i, p; 468 const uint16_t remainder = (uint16_t)(num_vmdq_queues % num_cores); 469 470 for (i = 0; i < num_cores; i++) 471 if (lcore_ids[i] == lcore_id) { 472 core_id = i; 473 break; 474 } 475 476 if (remainder != 0) { 477 if (core_id < remainder) { 478 startQueue = (uint16_t)(core_id * 479 (num_vmdq_queues / num_cores + 1)); 480 endQueue = (uint16_t)(startQueue + 481 (num_vmdq_queues / num_cores) + 1); 482 } else { 483 startQueue = (uint16_t)(core_id * 484 (num_vmdq_queues / num_cores) + 485 remainder); 486 endQueue = (uint16_t)(startQueue + 487 (num_vmdq_queues / num_cores)); 488 } 489 } else { 490 startQueue = (uint16_t)(core_id * 491 (num_vmdq_queues / num_cores)); 492 endQueue = (uint16_t)(startQueue + 493 (num_vmdq_queues / num_cores)); 494 } 495 496 /* vmdq queue idx doesn't always start from zero.*/ 497 startQueue += vmdq_queue_base; 498 endQueue += vmdq_queue_base; 499 printf("core %u(lcore %u) reading queues %i-%i\n", (unsigned)core_id, 500 (unsigned)lcore_id, startQueue, endQueue - 1); 501 502 if (startQueue == endQueue) { 503 printf("lcore %u has nothing to do\n", lcore_id); 504 return 0; 505 } 506 507 for (;;) { 508 struct rte_mbuf *buf[MAX_PKT_BURST]; 509 const uint16_t buf_size = sizeof(buf) / sizeof(buf[0]); 510 511 for (p = 0; p < num_ports; p++) { 512 const uint8_t sport = ports[p]; 513 /* 0 <-> 1, 2 <-> 3 etc */ 514 const uint8_t dport = ports[p ^ 1]; 515 if ((sport == INVALID_PORT_ID) || (dport == INVALID_PORT_ID)) 516 continue; 517 518 for (q = startQueue; q < endQueue; q++) { 519 const uint16_t rxCount = rte_eth_rx_burst(sport, 520 q, buf, buf_size); 521 522 if (unlikely(rxCount == 0)) 523 continue; 524 525 rxPackets[q] += rxCount; 526 527 for (i = 0; i < rxCount; i++) 528 update_mac_address(buf[i], dport); 529 530 const uint16_t txCount = rte_eth_tx_burst(dport, 531 vmdq_queue_base + core_id, 532 buf, 533 rxCount); 534 535 if (txCount != rxCount) { 536 for (i = txCount; i < rxCount; i++) 537 rte_pktmbuf_free(buf[i]); 538 } 539 } 540 } 541 } 542 } 543 544 /* 545 * Update the global var NUM_PORTS and array PORTS according to system ports number 546 * and return valid ports number 547 */ 548 static unsigned check_ports_num(unsigned nb_ports) 549 { 550 unsigned valid_num_ports = num_ports; 551 unsigned portid; 552 553 if (num_ports > nb_ports) { 554 printf("\nSpecified port number(%u) exceeds total system port number(%u)\n", 555 num_ports, nb_ports); 556 num_ports = nb_ports; 557 } 558 559 for (portid = 0; portid < num_ports; portid++) { 560 if (ports[portid] >= nb_ports) { 561 printf("\nSpecified port ID(%u) exceeds max system port ID(%u)\n", 562 ports[portid], (nb_ports - 1)); 563 ports[portid] = INVALID_PORT_ID; 564 valid_num_ports--; 565 } 566 } 567 return valid_num_ports; 568 } 569 570 /* Main function, does initialisation and calls the per-lcore functions */ 571 int 572 main(int argc, char *argv[]) 573 { 574 struct rte_mempool *mbuf_pool; 575 unsigned lcore_id, core_id = 0; 576 int ret; 577 unsigned nb_ports, valid_num_ports; 578 uint8_t portid; 579 580 signal(SIGHUP, sighup_handler); 581 582 /* init EAL */ 583 ret = rte_eal_init(argc, argv); 584 if (ret < 0) 585 rte_exit(EXIT_FAILURE, "Error with EAL initialization\n"); 586 argc -= ret; 587 argv += ret; 588 589 /* parse app arguments */ 590 ret = vmdq_parse_args(argc, argv); 591 if (ret < 0) 592 rte_exit(EXIT_FAILURE, "Invalid VMDQ argument\n"); 593 594 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) 595 if (rte_lcore_is_enabled(lcore_id)) 596 lcore_ids[core_id++] = lcore_id; 597 598 if (rte_lcore_count() > RTE_MAX_LCORE) 599 rte_exit(EXIT_FAILURE, "Not enough cores\n"); 600 601 nb_ports = rte_eth_dev_count(); 602 if (nb_ports > RTE_MAX_ETHPORTS) 603 nb_ports = RTE_MAX_ETHPORTS; 604 605 /* 606 * Update the global var NUM_PORTS and global array PORTS 607 * and get value of var VALID_NUM_PORTS according to system ports number 608 */ 609 valid_num_ports = check_ports_num(nb_ports); 610 611 if (valid_num_ports < 2 || valid_num_ports % 2) { 612 printf("Current valid ports number is %u\n", valid_num_ports); 613 rte_exit(EXIT_FAILURE, "Error with valid ports number is not even or less than 2\n"); 614 } 615 616 mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL", 617 NUM_MBUFS_PER_PORT * nb_ports, MBUF_CACHE_SIZE, 618 0, MBUF_DATA_SIZE, rte_socket_id()); 619 if (mbuf_pool == NULL) 620 rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n"); 621 622 /* initialize all ports */ 623 for (portid = 0; portid < nb_ports; portid++) { 624 /* skip ports that are not enabled */ 625 if ((enabled_port_mask & (1 << portid)) == 0) { 626 printf("\nSkipping disabled port %d\n", portid); 627 continue; 628 } 629 if (port_init(portid, mbuf_pool) != 0) 630 rte_exit(EXIT_FAILURE, "Cannot initialize network ports\n"); 631 } 632 633 /* call lcore_main() on every lcore */ 634 rte_eal_mp_remote_launch(lcore_main, NULL, CALL_MASTER); 635 RTE_LCORE_FOREACH_SLAVE(lcore_id) { 636 if (rte_eal_wait_lcore(lcore_id) < 0) 637 return -1; 638 } 639 640 return 0; 641 } 642