1 /*- 2 * BSD LICENSE 3 * 4 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <stdint.h> 35 #include <sys/queue.h> 36 #include <stdlib.h> 37 #include <string.h> 38 #include <stdio.h> 39 #include <assert.h> 40 #include <errno.h> 41 #include <signal.h> 42 #include <stdarg.h> 43 #include <inttypes.h> 44 #include <getopt.h> 45 46 #include <rte_common.h> 47 #include <rte_log.h> 48 #include <rte_memory.h> 49 #include <rte_memcpy.h> 50 #include <rte_memzone.h> 51 #include <rte_eal.h> 52 #include <rte_per_lcore.h> 53 #include <rte_launch.h> 54 #include <rte_atomic.h> 55 #include <rte_cycles.h> 56 #include <rte_prefetch.h> 57 #include <rte_lcore.h> 58 #include <rte_per_lcore.h> 59 #include <rte_branch_prediction.h> 60 #include <rte_interrupts.h> 61 #include <rte_pci.h> 62 #include <rte_random.h> 63 #include <rte_debug.h> 64 #include <rte_ether.h> 65 #include <rte_ethdev.h> 66 #include <rte_log.h> 67 #include <rte_mempool.h> 68 #include <rte_mbuf.h> 69 #include <rte_memcpy.h> 70 71 #define MAX_QUEUES 1024 72 /* 73 * 1024 queues require to meet the needs of a large number of vmdq_pools. 74 * (RX/TX_queue_nb * RX/TX_ring_descriptors_nb) per port. 75 */ 76 #define NUM_MBUFS_PER_PORT (MAX_QUEUES * RTE_MAX(RTE_TEST_RX_DESC_DEFAULT, \ 77 RTE_TEST_TX_DESC_DEFAULT)) 78 #define MBUF_CACHE_SIZE 64 79 80 #define MAX_PKT_BURST 32 81 82 /* 83 * Configurable number of RX/TX ring descriptors 84 */ 85 #define RTE_TEST_RX_DESC_DEFAULT 128 86 #define RTE_TEST_TX_DESC_DEFAULT 512 87 88 #define INVALID_PORT_ID 0xFF 89 90 /* mask of enabled ports */ 91 static uint32_t enabled_port_mask; 92 93 /* number of pools (if user does not specify any, 8 by default */ 94 static uint32_t num_queues = 8; 95 static uint32_t num_pools = 8; 96 97 /* empty vmdq configuration structure. Filled in programatically */ 98 static const struct rte_eth_conf vmdq_conf_default = { 99 .rxmode = { 100 .mq_mode = ETH_MQ_RX_VMDQ_ONLY, 101 .split_hdr_size = 0, 102 .header_split = 0, /**< Header Split disabled */ 103 .hw_ip_checksum = 0, /**< IP checksum offload disabled */ 104 .hw_vlan_filter = 0, /**< VLAN filtering disabled */ 105 .jumbo_frame = 0, /**< Jumbo Frame Support disabled */ 106 }, 107 108 .txmode = { 109 .mq_mode = ETH_MQ_TX_NONE, 110 }, 111 .rx_adv_conf = { 112 /* 113 * should be overridden separately in code with 114 * appropriate values 115 */ 116 .vmdq_rx_conf = { 117 .nb_queue_pools = ETH_8_POOLS, 118 .enable_default_pool = 0, 119 .default_pool = 0, 120 .nb_pool_maps = 0, 121 .pool_map = {{0, 0},}, 122 }, 123 }, 124 }; 125 126 static unsigned lcore_ids[RTE_MAX_LCORE]; 127 static uint8_t ports[RTE_MAX_ETHPORTS]; 128 static unsigned num_ports; /**< The number of ports specified in command line */ 129 130 /* array used for printing out statistics */ 131 volatile unsigned long rxPackets[MAX_QUEUES] = {0}; 132 133 const uint16_t vlan_tags[] = { 134 0, 1, 2, 3, 4, 5, 6, 7, 135 8, 9, 10, 11, 12, 13, 14, 15, 136 16, 17, 18, 19, 20, 21, 22, 23, 137 24, 25, 26, 27, 28, 29, 30, 31, 138 32, 33, 34, 35, 36, 37, 38, 39, 139 40, 41, 42, 43, 44, 45, 46, 47, 140 48, 49, 50, 51, 52, 53, 54, 55, 141 56, 57, 58, 59, 60, 61, 62, 63, 142 }; 143 const uint16_t num_vlans = RTE_DIM(vlan_tags); 144 static uint16_t num_pf_queues, num_vmdq_queues; 145 static uint16_t vmdq_pool_base, vmdq_queue_base; 146 /* pool mac addr template, pool mac addr is like: 52 54 00 12 port# pool# */ 147 static struct ether_addr pool_addr_template = { 148 .addr_bytes = {0x52, 0x54, 0x00, 0x12, 0x00, 0x00} 149 }; 150 151 /* ethernet addresses of ports */ 152 static struct ether_addr vmdq_ports_eth_addr[RTE_MAX_ETHPORTS]; 153 154 #define MAX_QUEUE_NUM_10G 128 155 #define MAX_QUEUE_NUM_1G 8 156 #define MAX_POOL_MAP_NUM_10G 64 157 #define MAX_POOL_MAP_NUM_1G 32 158 #define MAX_POOL_NUM_10G 64 159 #define MAX_POOL_NUM_1G 8 160 /* 161 * Builds up the correct configuration for vmdq based on the vlan tags array 162 * given above, and determine the queue number and pool map number according to 163 * valid pool number 164 */ 165 static inline int 166 get_eth_conf(struct rte_eth_conf *eth_conf, uint32_t num_pools) 167 { 168 struct rte_eth_vmdq_rx_conf conf; 169 unsigned i; 170 171 conf.nb_queue_pools = (enum rte_eth_nb_pools)num_pools; 172 conf.nb_pool_maps = num_pools; 173 conf.enable_default_pool = 0; 174 conf.default_pool = 0; /* set explicit value, even if not used */ 175 176 for (i = 0; i < conf.nb_pool_maps; i++) { 177 conf.pool_map[i].vlan_id = vlan_tags[i]; 178 conf.pool_map[i].pools = (1UL << (i % num_pools)); 179 } 180 181 (void)(rte_memcpy(eth_conf, &vmdq_conf_default, sizeof(*eth_conf))); 182 (void)(rte_memcpy(ð_conf->rx_adv_conf.vmdq_rx_conf, &conf, 183 sizeof(eth_conf->rx_adv_conf.vmdq_rx_conf))); 184 return 0; 185 } 186 187 /* 188 * Initialises a given port using global settings and with the rx buffers 189 * coming from the mbuf_pool passed as parameter 190 */ 191 static inline int 192 port_init(uint8_t port, struct rte_mempool *mbuf_pool) 193 { 194 struct rte_eth_dev_info dev_info; 195 struct rte_eth_rxconf *rxconf; 196 struct rte_eth_conf port_conf; 197 uint16_t rxRings, txRings; 198 const uint16_t rxRingSize = RTE_TEST_RX_DESC_DEFAULT, txRingSize = RTE_TEST_TX_DESC_DEFAULT; 199 int retval; 200 uint16_t q; 201 uint16_t queues_per_pool; 202 uint32_t max_nb_pools; 203 204 /* 205 * The max pool number from dev_info will be used to validate the pool 206 * number specified in cmd line 207 */ 208 rte_eth_dev_info_get(port, &dev_info); 209 max_nb_pools = (uint32_t)dev_info.max_vmdq_pools; 210 /* 211 * We allow to process part of VMDQ pools specified by num_pools in 212 * command line. 213 */ 214 if (num_pools > max_nb_pools) { 215 printf("num_pools %d >max_nb_pools %d\n", 216 num_pools, max_nb_pools); 217 return -1; 218 } 219 retval = get_eth_conf(&port_conf, max_nb_pools); 220 if (retval < 0) 221 return retval; 222 223 /* 224 * NIC queues are divided into pf queues and vmdq queues. 225 */ 226 /* There is assumption here all ports have the same configuration! */ 227 num_pf_queues = dev_info.max_rx_queues - dev_info.vmdq_queue_num; 228 queues_per_pool = dev_info.vmdq_queue_num / dev_info.max_vmdq_pools; 229 num_vmdq_queues = num_pools * queues_per_pool; 230 num_queues = num_pf_queues + num_vmdq_queues; 231 vmdq_queue_base = dev_info.vmdq_queue_base; 232 vmdq_pool_base = dev_info.vmdq_pool_base; 233 234 printf("pf queue num: %u, configured vmdq pool num: %u," 235 " each vmdq pool has %u queues\n", 236 num_pf_queues, num_pools, queues_per_pool); 237 printf("vmdq queue base: %d pool base %d\n", 238 vmdq_queue_base, vmdq_pool_base); 239 if (port >= rte_eth_dev_count()) 240 return -1; 241 242 /* 243 * Though in this example, we only receive packets from the first queue 244 * of each pool and send packets through first rte_lcore_count() tx 245 * queues of vmdq queues, all queues including pf queues are setup. 246 * This is because VMDQ queues doesn't always start from zero, and the 247 * PMD layer doesn't support selectively initialising part of rx/tx 248 * queues. 249 */ 250 rxRings = (uint16_t)dev_info.max_rx_queues; 251 txRings = (uint16_t)dev_info.max_tx_queues; 252 retval = rte_eth_dev_configure(port, rxRings, txRings, &port_conf); 253 if (retval != 0) 254 return retval; 255 256 rte_eth_dev_info_get(port, &dev_info); 257 rxconf = &dev_info.default_rxconf; 258 rxconf->rx_drop_en = 1; 259 for (q = 0; q < rxRings; q++) { 260 retval = rte_eth_rx_queue_setup(port, q, rxRingSize, 261 rte_eth_dev_socket_id(port), 262 rxconf, 263 mbuf_pool); 264 if (retval < 0) { 265 printf("initialise rx queue %d failed\n", q); 266 return retval; 267 } 268 } 269 270 for (q = 0; q < txRings; q++) { 271 retval = rte_eth_tx_queue_setup(port, q, txRingSize, 272 rte_eth_dev_socket_id(port), 273 NULL); 274 if (retval < 0) { 275 printf("initialise tx queue %d failed\n", q); 276 return retval; 277 } 278 } 279 280 retval = rte_eth_dev_start(port); 281 if (retval < 0) { 282 printf("port %d start failed\n", port); 283 return retval; 284 } 285 286 rte_eth_macaddr_get(port, &vmdq_ports_eth_addr[port]); 287 printf("Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8 288 " %02"PRIx8" %02"PRIx8" %02"PRIx8"\n", 289 (unsigned)port, 290 vmdq_ports_eth_addr[port].addr_bytes[0], 291 vmdq_ports_eth_addr[port].addr_bytes[1], 292 vmdq_ports_eth_addr[port].addr_bytes[2], 293 vmdq_ports_eth_addr[port].addr_bytes[3], 294 vmdq_ports_eth_addr[port].addr_bytes[4], 295 vmdq_ports_eth_addr[port].addr_bytes[5]); 296 297 /* 298 * Set mac for each pool. 299 * There is no default mac for the pools in i40. 300 * Removes this after i40e fixes this issue. 301 */ 302 for (q = 0; q < num_pools; q++) { 303 struct ether_addr mac; 304 mac = pool_addr_template; 305 mac.addr_bytes[4] = port; 306 mac.addr_bytes[5] = q; 307 printf("Port %u vmdq pool %u set mac %02x:%02x:%02x:%02x:%02x:%02x\n", 308 port, q, 309 mac.addr_bytes[0], mac.addr_bytes[1], 310 mac.addr_bytes[2], mac.addr_bytes[3], 311 mac.addr_bytes[4], mac.addr_bytes[5]); 312 retval = rte_eth_dev_mac_addr_add(port, &mac, 313 q + vmdq_pool_base); 314 if (retval) { 315 printf("mac addr add failed at pool %d\n", q); 316 return retval; 317 } 318 } 319 320 return 0; 321 } 322 323 /* Check num_pools parameter and set it if OK*/ 324 static int 325 vmdq_parse_num_pools(const char *q_arg) 326 { 327 char *end = NULL; 328 int n; 329 330 /* parse number string */ 331 n = strtol(q_arg, &end, 10); 332 if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0')) 333 return -1; 334 335 if (num_pools > num_vlans) { 336 printf("num_pools %d > num_vlans %d\n", num_pools, num_vlans); 337 return -1; 338 } 339 340 num_pools = n; 341 342 return 0; 343 } 344 345 346 static int 347 parse_portmask(const char *portmask) 348 { 349 char *end = NULL; 350 unsigned long pm; 351 352 /* parse hexadecimal string */ 353 pm = strtoul(portmask, &end, 16); 354 if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0')) 355 return -1; 356 357 if (pm == 0) 358 return -1; 359 360 return pm; 361 } 362 363 /* Display usage */ 364 static void 365 vmdq_usage(const char *prgname) 366 { 367 printf("%s [EAL options] -- -p PORTMASK]\n" 368 " --nb-pools NP: number of pools\n", 369 prgname); 370 } 371 372 /* Parse the argument (num_pools) given in the command line of the application */ 373 static int 374 vmdq_parse_args(int argc, char **argv) 375 { 376 int opt; 377 int option_index; 378 unsigned i; 379 const char *prgname = argv[0]; 380 static struct option long_option[] = { 381 {"nb-pools", required_argument, NULL, 0}, 382 {NULL, 0, 0, 0} 383 }; 384 385 /* Parse command line */ 386 while ((opt = getopt_long(argc, argv, "p:", long_option, 387 &option_index)) != EOF) { 388 switch (opt) { 389 /* portmask */ 390 case 'p': 391 enabled_port_mask = parse_portmask(optarg); 392 if (enabled_port_mask == 0) { 393 printf("invalid portmask\n"); 394 vmdq_usage(prgname); 395 return -1; 396 } 397 break; 398 case 0: 399 if (vmdq_parse_num_pools(optarg) == -1) { 400 printf("invalid number of pools\n"); 401 vmdq_usage(prgname); 402 return -1; 403 } 404 break; 405 406 default: 407 vmdq_usage(prgname); 408 return -1; 409 } 410 } 411 412 for (i = 0; i < RTE_MAX_ETHPORTS; i++) { 413 if (enabled_port_mask & (1 << i)) 414 ports[num_ports++] = (uint8_t)i; 415 } 416 417 if (num_ports < 2 || num_ports % 2) { 418 printf("Current enabled port number is %u," 419 "but it should be even and at least 2\n", num_ports); 420 return -1; 421 } 422 423 return 0; 424 } 425 426 static void 427 update_mac_address(struct rte_mbuf *m, unsigned dst_port) 428 { 429 struct ether_hdr *eth; 430 void *tmp; 431 432 eth = rte_pktmbuf_mtod(m, struct ether_hdr *); 433 434 /* 02:00:00:00:00:xx */ 435 tmp = ð->d_addr.addr_bytes[0]; 436 *((uint64_t *)tmp) = 0x000000000002 + ((uint64_t)dst_port << 40); 437 438 /* src addr */ 439 ether_addr_copy(&vmdq_ports_eth_addr[dst_port], ð->s_addr); 440 } 441 442 /* When we receive a HUP signal, print out our stats */ 443 static void 444 sighup_handler(int signum) 445 { 446 unsigned q; 447 for (q = 0; q < num_queues; q++) { 448 if (q % (num_queues/num_pools) == 0) 449 printf("\nPool %u: ", q/(num_queues/num_pools)); 450 printf("%lu ", rxPackets[q]); 451 } 452 printf("\nFinished handling signal %d\n", signum); 453 } 454 455 /* 456 * Main thread that does the work, reading from INPUT_PORT 457 * and writing to OUTPUT_PORT 458 */ 459 static int 460 lcore_main(__attribute__((__unused__)) void *dummy) 461 { 462 const uint16_t lcore_id = (uint16_t)rte_lcore_id(); 463 const uint16_t num_cores = (uint16_t)rte_lcore_count(); 464 uint16_t core_id = 0; 465 uint16_t startQueue, endQueue; 466 uint16_t q, i, p; 467 const uint16_t remainder = (uint16_t)(num_vmdq_queues % num_cores); 468 469 for (i = 0; i < num_cores; i++) 470 if (lcore_ids[i] == lcore_id) { 471 core_id = i; 472 break; 473 } 474 475 if (remainder != 0) { 476 if (core_id < remainder) { 477 startQueue = (uint16_t)(core_id * 478 (num_vmdq_queues / num_cores + 1)); 479 endQueue = (uint16_t)(startQueue + 480 (num_vmdq_queues / num_cores) + 1); 481 } else { 482 startQueue = (uint16_t)(core_id * 483 (num_vmdq_queues / num_cores) + 484 remainder); 485 endQueue = (uint16_t)(startQueue + 486 (num_vmdq_queues / num_cores)); 487 } 488 } else { 489 startQueue = (uint16_t)(core_id * 490 (num_vmdq_queues / num_cores)); 491 endQueue = (uint16_t)(startQueue + 492 (num_vmdq_queues / num_cores)); 493 } 494 495 /* vmdq queue idx doesn't always start from zero.*/ 496 startQueue += vmdq_queue_base; 497 endQueue += vmdq_queue_base; 498 printf("core %u(lcore %u) reading queues %i-%i\n", (unsigned)core_id, 499 (unsigned)lcore_id, startQueue, endQueue - 1); 500 501 if (startQueue == endQueue) { 502 printf("lcore %u has nothing to do\n", lcore_id); 503 return 0; 504 } 505 506 for (;;) { 507 struct rte_mbuf *buf[MAX_PKT_BURST]; 508 const uint16_t buf_size = sizeof(buf) / sizeof(buf[0]); 509 510 for (p = 0; p < num_ports; p++) { 511 const uint8_t sport = ports[p]; 512 /* 0 <-> 1, 2 <-> 3 etc */ 513 const uint8_t dport = ports[p ^ 1]; 514 if ((sport == INVALID_PORT_ID) || (dport == INVALID_PORT_ID)) 515 continue; 516 517 for (q = startQueue; q < endQueue; q++) { 518 const uint16_t rxCount = rte_eth_rx_burst(sport, 519 q, buf, buf_size); 520 521 if (unlikely(rxCount == 0)) 522 continue; 523 524 rxPackets[q] += rxCount; 525 526 for (i = 0; i < rxCount; i++) 527 update_mac_address(buf[i], dport); 528 529 const uint16_t txCount = rte_eth_tx_burst(dport, 530 vmdq_queue_base + core_id, 531 buf, 532 rxCount); 533 534 if (txCount != rxCount) { 535 for (i = txCount; i < rxCount; i++) 536 rte_pktmbuf_free(buf[i]); 537 } 538 } 539 } 540 } 541 } 542 543 /* 544 * Update the global var NUM_PORTS and array PORTS according to system ports number 545 * and return valid ports number 546 */ 547 static unsigned check_ports_num(unsigned nb_ports) 548 { 549 unsigned valid_num_ports = num_ports; 550 unsigned portid; 551 552 if (num_ports > nb_ports) { 553 printf("\nSpecified port number(%u) exceeds total system port number(%u)\n", 554 num_ports, nb_ports); 555 num_ports = nb_ports; 556 } 557 558 for (portid = 0; portid < num_ports; portid++) { 559 if (ports[portid] >= nb_ports) { 560 printf("\nSpecified port ID(%u) exceeds max system port ID(%u)\n", 561 ports[portid], (nb_ports - 1)); 562 ports[portid] = INVALID_PORT_ID; 563 valid_num_ports--; 564 } 565 } 566 return valid_num_ports; 567 } 568 569 /* Main function, does initialisation and calls the per-lcore functions */ 570 int 571 main(int argc, char *argv[]) 572 { 573 struct rte_mempool *mbuf_pool; 574 unsigned lcore_id, core_id = 0; 575 int ret; 576 unsigned nb_ports, valid_num_ports; 577 uint8_t portid; 578 579 signal(SIGHUP, sighup_handler); 580 581 /* init EAL */ 582 ret = rte_eal_init(argc, argv); 583 if (ret < 0) 584 rte_exit(EXIT_FAILURE, "Error with EAL initialization\n"); 585 argc -= ret; 586 argv += ret; 587 588 /* parse app arguments */ 589 ret = vmdq_parse_args(argc, argv); 590 if (ret < 0) 591 rte_exit(EXIT_FAILURE, "Invalid VMDQ argument\n"); 592 593 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) 594 if (rte_lcore_is_enabled(lcore_id)) 595 lcore_ids[core_id++] = lcore_id; 596 597 if (rte_lcore_count() > RTE_MAX_LCORE) 598 rte_exit(EXIT_FAILURE, "Not enough cores\n"); 599 600 nb_ports = rte_eth_dev_count(); 601 602 /* 603 * Update the global var NUM_PORTS and global array PORTS 604 * and get value of var VALID_NUM_PORTS according to system ports number 605 */ 606 valid_num_ports = check_ports_num(nb_ports); 607 608 if (valid_num_ports < 2 || valid_num_ports % 2) { 609 printf("Current valid ports number is %u\n", valid_num_ports); 610 rte_exit(EXIT_FAILURE, "Error with valid ports number is not even or less than 2\n"); 611 } 612 613 mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL", 614 NUM_MBUFS_PER_PORT * nb_ports, MBUF_CACHE_SIZE, 615 0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id()); 616 if (mbuf_pool == NULL) 617 rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n"); 618 619 /* initialize all ports */ 620 for (portid = 0; portid < nb_ports; portid++) { 621 /* skip ports that are not enabled */ 622 if ((enabled_port_mask & (1 << portid)) == 0) { 623 printf("\nSkipping disabled port %d\n", portid); 624 continue; 625 } 626 if (port_init(portid, mbuf_pool) != 0) 627 rte_exit(EXIT_FAILURE, "Cannot initialize network ports\n"); 628 } 629 630 /* call lcore_main() on every lcore */ 631 rte_eal_mp_remote_launch(lcore_main, NULL, CALL_MASTER); 632 RTE_LCORE_FOREACH_SLAVE(lcore_id) { 633 if (rte_eal_wait_lcore(lcore_id) < 0) 634 return -1; 635 } 636 637 return 0; 638 } 639