1 /*- 2 * BSD LICENSE 3 * 4 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <stdint.h> 35 #include <inttypes.h> 36 #include <unistd.h> 37 #include <signal.h> 38 #include <getopt.h> 39 40 #include <rte_eal.h> 41 #include <rte_ethdev.h> 42 #include <rte_cycles.h> 43 #include <rte_malloc.h> 44 #include <rte_debug.h> 45 #include <rte_distributor.h> 46 47 #define RX_RING_SIZE 256 48 #define TX_RING_SIZE 512 49 #define NUM_MBUFS ((64*1024)-1) 50 #define MBUF_SIZE (2048 + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM) 51 #define MBUF_CACHE_SIZE 250 52 #define BURST_SIZE 32 53 #define RTE_RING_SZ 1024 54 55 /* uncommnet below line to enable debug logs */ 56 /* #define DEBUG */ 57 58 #ifdef DEBUG 59 #define LOG_LEVEL RTE_LOG_DEBUG 60 #define LOG_DEBUG(log_type, fmt, args...) do { \ 61 RTE_LOG(DEBUG, log_type, fmt, ##args) \ 62 } while (0) 63 #else 64 #define LOG_LEVEL RTE_LOG_INFO 65 #define LOG_DEBUG(log_type, fmt, args...) do {} while (0) 66 #endif 67 68 #define RTE_LOGTYPE_DISTRAPP RTE_LOGTYPE_USER1 69 70 /* mask of enabled ports */ 71 static uint32_t enabled_port_mask; 72 volatile uint8_t quit_signal; 73 volatile uint8_t quit_signal_rx; 74 75 static volatile struct app_stats { 76 struct { 77 uint64_t rx_pkts; 78 uint64_t returned_pkts; 79 uint64_t enqueued_pkts; 80 } rx __rte_cache_aligned; 81 82 struct { 83 uint64_t dequeue_pkts; 84 uint64_t tx_pkts; 85 } tx __rte_cache_aligned; 86 } app_stats; 87 88 static const struct rte_eth_conf port_conf_default = { 89 .rxmode = { 90 .mq_mode = ETH_MQ_RX_RSS, 91 .max_rx_pkt_len = ETHER_MAX_LEN, 92 }, 93 .txmode = { 94 .mq_mode = ETH_MQ_TX_NONE, 95 }, 96 .rx_adv_conf = { 97 .rss_conf = { 98 .rss_hf = ETH_RSS_IP | ETH_RSS_UDP | 99 ETH_RSS_TCP | ETH_RSS_SCTP, 100 } 101 }, 102 }; 103 104 struct output_buffer { 105 unsigned count; 106 struct rte_mbuf *mbufs[BURST_SIZE]; 107 }; 108 109 /* 110 * Initialises a given port using global settings and with the rx buffers 111 * coming from the mbuf_pool passed as parameter 112 */ 113 static inline int 114 port_init(uint8_t port, struct rte_mempool *mbuf_pool) 115 { 116 struct rte_eth_conf port_conf = port_conf_default; 117 const uint16_t rxRings = 1, txRings = rte_lcore_count() - 1; 118 int retval; 119 uint16_t q; 120 121 if (port >= rte_eth_dev_count()) 122 return -1; 123 124 retval = rte_eth_dev_configure(port, rxRings, txRings, &port_conf); 125 if (retval != 0) 126 return retval; 127 128 for (q = 0; q < rxRings; q++) { 129 retval = rte_eth_rx_queue_setup(port, q, RX_RING_SIZE, 130 rte_eth_dev_socket_id(port), 131 NULL, mbuf_pool); 132 if (retval < 0) 133 return retval; 134 } 135 136 for (q = 0; q < txRings; q++) { 137 retval = rte_eth_tx_queue_setup(port, q, TX_RING_SIZE, 138 rte_eth_dev_socket_id(port), 139 NULL); 140 if (retval < 0) 141 return retval; 142 } 143 144 retval = rte_eth_dev_start(port); 145 if (retval < 0) 146 return retval; 147 148 struct rte_eth_link link; 149 rte_eth_link_get_nowait(port, &link); 150 if (!link.link_status) { 151 sleep(1); 152 rte_eth_link_get_nowait(port, &link); 153 } 154 155 if (!link.link_status) { 156 printf("Link down on port %"PRIu8"\n", port); 157 return 0; 158 } 159 160 struct ether_addr addr; 161 rte_eth_macaddr_get(port, &addr); 162 printf("Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8 163 " %02"PRIx8" %02"PRIx8" %02"PRIx8"\n", 164 (unsigned)port, 165 addr.addr_bytes[0], addr.addr_bytes[1], 166 addr.addr_bytes[2], addr.addr_bytes[3], 167 addr.addr_bytes[4], addr.addr_bytes[5]); 168 169 rte_eth_promiscuous_enable(port); 170 171 return 0; 172 } 173 174 struct lcore_params { 175 unsigned worker_id; 176 struct rte_distributor *d; 177 struct rte_ring *r; 178 struct rte_mempool *mem_pool; 179 }; 180 181 static void 182 quit_workers(struct rte_distributor *d, struct rte_mempool *p) 183 { 184 const unsigned num_workers = rte_lcore_count() - 2; 185 unsigned i; 186 struct rte_mbuf *bufs[num_workers]; 187 rte_mempool_get_bulk(p, (void *)bufs, num_workers); 188 189 for (i = 0; i < num_workers; i++) 190 bufs[i]->hash.rss = i << 1; 191 192 rte_distributor_process(d, bufs, num_workers); 193 rte_mempool_put_bulk(p, (void *)bufs, num_workers); 194 } 195 196 static int 197 lcore_rx(struct lcore_params *p) 198 { 199 struct rte_distributor *d = p->d; 200 struct rte_mempool *mem_pool = p->mem_pool; 201 struct rte_ring *r = p->r; 202 const uint8_t nb_ports = rte_eth_dev_count(); 203 const int socket_id = rte_socket_id(); 204 uint8_t port; 205 206 for (port = 0; port < nb_ports; port++) { 207 /* skip ports that are not enabled */ 208 if ((enabled_port_mask & (1 << port)) == 0) 209 continue; 210 211 if (rte_eth_dev_socket_id(port) > 0 && 212 rte_eth_dev_socket_id(port) != socket_id) 213 printf("WARNING, port %u is on remote NUMA node to " 214 "RX thread.\n\tPerformance will not " 215 "be optimal.\n", port); 216 } 217 218 printf("\nCore %u doing packet RX.\n", rte_lcore_id()); 219 port = 0; 220 while (!quit_signal_rx) { 221 222 /* skip ports that are not enabled */ 223 if ((enabled_port_mask & (1 << port)) == 0) { 224 if (++port == nb_ports) 225 port = 0; 226 continue; 227 } 228 struct rte_mbuf *bufs[BURST_SIZE*2]; 229 const uint16_t nb_rx = rte_eth_rx_burst(port, 0, bufs, 230 BURST_SIZE); 231 app_stats.rx.rx_pkts += nb_rx; 232 233 rte_distributor_process(d, bufs, nb_rx); 234 const uint16_t nb_ret = rte_distributor_returned_pkts(d, 235 bufs, BURST_SIZE*2); 236 app_stats.rx.returned_pkts += nb_ret; 237 if (unlikely(nb_ret == 0)) 238 continue; 239 240 uint16_t sent = rte_ring_enqueue_burst(r, (void *)bufs, nb_ret); 241 app_stats.rx.enqueued_pkts += sent; 242 if (unlikely(sent < nb_ret)) { 243 LOG_DEBUG(DISTRAPP, "%s:Packet loss due to full ring\n", __func__); 244 while (sent < nb_ret) 245 rte_pktmbuf_free(bufs[sent++]); 246 } 247 if (++port == nb_ports) 248 port = 0; 249 } 250 rte_distributor_process(d, NULL, 0); 251 /* flush distributor to bring to known state */ 252 rte_distributor_flush(d); 253 /* set worker & tx threads quit flag */ 254 quit_signal = 1; 255 /* 256 * worker threads may hang in get packet as 257 * distributor process is not running, just make sure workers 258 * get packets till quit_signal is actually been 259 * received and they gracefully shutdown 260 */ 261 quit_workers(d, mem_pool); 262 /* rx thread should quit at last */ 263 return 0; 264 } 265 266 static inline void 267 flush_one_port(struct output_buffer *outbuf, uint8_t outp) 268 { 269 unsigned nb_tx = rte_eth_tx_burst(outp, 0, outbuf->mbufs, 270 outbuf->count); 271 app_stats.tx.tx_pkts += nb_tx; 272 273 if (unlikely(nb_tx < outbuf->count)) { 274 LOG_DEBUG(DISTRAPP, "%s:Packet loss with tx_burst\n", __func__); 275 do { 276 rte_pktmbuf_free(outbuf->mbufs[nb_tx]); 277 } while (++nb_tx < outbuf->count); 278 } 279 outbuf->count = 0; 280 } 281 282 static inline void 283 flush_all_ports(struct output_buffer *tx_buffers, uint8_t nb_ports) 284 { 285 uint8_t outp; 286 for (outp = 0; outp < nb_ports; outp++) { 287 /* skip ports that are not enabled */ 288 if ((enabled_port_mask & (1 << outp)) == 0) 289 continue; 290 291 if (tx_buffers[outp].count == 0) 292 continue; 293 294 flush_one_port(&tx_buffers[outp], outp); 295 } 296 } 297 298 static int 299 lcore_tx(struct rte_ring *in_r) 300 { 301 static struct output_buffer tx_buffers[RTE_MAX_ETHPORTS]; 302 const uint8_t nb_ports = rte_eth_dev_count(); 303 const int socket_id = rte_socket_id(); 304 uint8_t port; 305 306 for (port = 0; port < nb_ports; port++) { 307 /* skip ports that are not enabled */ 308 if ((enabled_port_mask & (1 << port)) == 0) 309 continue; 310 311 if (rte_eth_dev_socket_id(port) > 0 && 312 rte_eth_dev_socket_id(port) != socket_id) 313 printf("WARNING, port %u is on remote NUMA node to " 314 "TX thread.\n\tPerformance will not " 315 "be optimal.\n", port); 316 } 317 318 printf("\nCore %u doing packet TX.\n", rte_lcore_id()); 319 while (!quit_signal) { 320 321 for (port = 0; port < nb_ports; port++) { 322 /* skip ports that are not enabled */ 323 if ((enabled_port_mask & (1 << port)) == 0) 324 continue; 325 326 struct rte_mbuf *bufs[BURST_SIZE]; 327 const uint16_t nb_rx = rte_ring_dequeue_burst(in_r, 328 (void *)bufs, BURST_SIZE); 329 app_stats.tx.dequeue_pkts += nb_rx; 330 331 /* if we get no traffic, flush anything we have */ 332 if (unlikely(nb_rx == 0)) { 333 flush_all_ports(tx_buffers, nb_ports); 334 continue; 335 } 336 337 /* for traffic we receive, queue it up for transmit */ 338 uint16_t i; 339 _mm_prefetch(bufs[0], 0); 340 _mm_prefetch(bufs[1], 0); 341 _mm_prefetch(bufs[2], 0); 342 for (i = 0; i < nb_rx; i++) { 343 struct output_buffer *outbuf; 344 uint8_t outp; 345 _mm_prefetch(bufs[i + 3], 0); 346 /* 347 * workers should update in_port to hold the 348 * output port value 349 */ 350 outp = bufs[i]->port; 351 /* skip ports that are not enabled */ 352 if ((enabled_port_mask & (1 << outp)) == 0) 353 continue; 354 355 outbuf = &tx_buffers[outp]; 356 outbuf->mbufs[outbuf->count++] = bufs[i]; 357 if (outbuf->count == BURST_SIZE) 358 flush_one_port(outbuf, outp); 359 } 360 } 361 } 362 return 0; 363 } 364 365 static void 366 int_handler(int sig_num) 367 { 368 printf("Exiting on signal %d\n", sig_num); 369 /* set quit flag for rx thread to exit */ 370 quit_signal_rx = 1; 371 } 372 373 static void 374 print_stats(void) 375 { 376 struct rte_eth_stats eth_stats; 377 unsigned i; 378 379 printf("\nRX thread stats:\n"); 380 printf(" - Received: %"PRIu64"\n", app_stats.rx.rx_pkts); 381 printf(" - Processed: %"PRIu64"\n", app_stats.rx.returned_pkts); 382 printf(" - Enqueued: %"PRIu64"\n", app_stats.rx.enqueued_pkts); 383 384 printf("\nTX thread stats:\n"); 385 printf(" - Dequeued: %"PRIu64"\n", app_stats.tx.dequeue_pkts); 386 printf(" - Transmitted: %"PRIu64"\n", app_stats.tx.tx_pkts); 387 388 for (i = 0; i < rte_eth_dev_count(); i++) { 389 rte_eth_stats_get(i, ð_stats); 390 printf("\nPort %u stats:\n", i); 391 printf(" - Pkts in: %"PRIu64"\n", eth_stats.ipackets); 392 printf(" - Pkts out: %"PRIu64"\n", eth_stats.opackets); 393 printf(" - In Errs: %"PRIu64"\n", eth_stats.ierrors); 394 printf(" - Out Errs: %"PRIu64"\n", eth_stats.oerrors); 395 printf(" - Mbuf Errs: %"PRIu64"\n", eth_stats.rx_nombuf); 396 } 397 } 398 399 static int 400 lcore_worker(struct lcore_params *p) 401 { 402 struct rte_distributor *d = p->d; 403 const unsigned id = p->worker_id; 404 /* 405 * for single port, xor_val will be zero so we won't modify the output 406 * port, otherwise we send traffic from 0 to 1, 2 to 3, and vice versa 407 */ 408 const unsigned xor_val = (rte_eth_dev_count() > 1); 409 struct rte_mbuf *buf = NULL; 410 411 printf("\nCore %u acting as worker core.\n", rte_lcore_id()); 412 while (!quit_signal) { 413 buf = rte_distributor_get_pkt(d, id, buf); 414 buf->port ^= xor_val; 415 } 416 return 0; 417 } 418 419 /* display usage */ 420 static void 421 print_usage(const char *prgname) 422 { 423 printf("%s [EAL options] -- -p PORTMASK\n" 424 " -p PORTMASK: hexadecimal bitmask of ports to configure\n", 425 prgname); 426 } 427 428 static int 429 parse_portmask(const char *portmask) 430 { 431 char *end = NULL; 432 unsigned long pm; 433 434 /* parse hexadecimal string */ 435 pm = strtoul(portmask, &end, 16); 436 if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0')) 437 return -1; 438 439 if (pm == 0) 440 return -1; 441 442 return pm; 443 } 444 445 /* Parse the argument given in the command line of the application */ 446 static int 447 parse_args(int argc, char **argv) 448 { 449 int opt; 450 char **argvopt; 451 int option_index; 452 char *prgname = argv[0]; 453 static struct option lgopts[] = { 454 {NULL, 0, 0, 0} 455 }; 456 457 argvopt = argv; 458 459 while ((opt = getopt_long(argc, argvopt, "p:", 460 lgopts, &option_index)) != EOF) { 461 462 switch (opt) { 463 /* portmask */ 464 case 'p': 465 enabled_port_mask = parse_portmask(optarg); 466 if (enabled_port_mask == 0) { 467 printf("invalid portmask\n"); 468 print_usage(prgname); 469 return -1; 470 } 471 break; 472 473 default: 474 print_usage(prgname); 475 return -1; 476 } 477 } 478 479 if (optind <= 1) { 480 print_usage(prgname); 481 return -1; 482 } 483 484 argv[optind-1] = prgname; 485 486 optind = 0; /* reset getopt lib */ 487 return 0; 488 } 489 490 /* Main function, does initialization and calls the per-lcore functions */ 491 int 492 main(int argc, char *argv[]) 493 { 494 struct rte_mempool *mbuf_pool; 495 struct rte_distributor *d; 496 struct rte_ring *output_ring; 497 unsigned lcore_id, worker_id = 0; 498 unsigned nb_ports; 499 uint8_t portid; 500 uint8_t nb_ports_available; 501 502 /* catch ctrl-c so we can print on exit */ 503 signal(SIGINT, int_handler); 504 505 /* init EAL */ 506 int ret = rte_eal_init(argc, argv); 507 if (ret < 0) 508 rte_exit(EXIT_FAILURE, "Error with EAL initialization\n"); 509 argc -= ret; 510 argv += ret; 511 512 /* parse application arguments (after the EAL ones) */ 513 ret = parse_args(argc, argv); 514 if (ret < 0) 515 rte_exit(EXIT_FAILURE, "Invalid distributor parameters\n"); 516 517 if (rte_lcore_count() < 3) 518 rte_exit(EXIT_FAILURE, "Error, This application needs at " 519 "least 3 logical cores to run:\n" 520 "1 lcore for packet RX and distribution\n" 521 "1 lcore for packet TX\n" 522 "and at least 1 lcore for worker threads\n"); 523 524 nb_ports = rte_eth_dev_count(); 525 if (nb_ports == 0) 526 rte_exit(EXIT_FAILURE, "Error: no ethernet ports detected\n"); 527 if (nb_ports != 1 && (nb_ports & 1)) 528 rte_exit(EXIT_FAILURE, "Error: number of ports must be even, except " 529 "when using a single port\n"); 530 531 mbuf_pool = rte_mempool_create("MBUF_POOL", NUM_MBUFS * nb_ports, 532 MBUF_SIZE, MBUF_CACHE_SIZE, 533 sizeof(struct rte_pktmbuf_pool_private), 534 rte_pktmbuf_pool_init, NULL, 535 rte_pktmbuf_init, NULL, 536 rte_socket_id(), 0); 537 if (mbuf_pool == NULL) 538 rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n"); 539 nb_ports_available = nb_ports; 540 541 /* initialize all ports */ 542 for (portid = 0; portid < nb_ports; portid++) { 543 /* skip ports that are not enabled */ 544 if ((enabled_port_mask & (1 << portid)) == 0) { 545 printf("\nSkipping disabled port %d\n", portid); 546 nb_ports_available--; 547 continue; 548 } 549 /* init port */ 550 printf("Initializing port %u... done\n", (unsigned) portid); 551 552 if (port_init(portid, mbuf_pool) != 0) 553 rte_exit(EXIT_FAILURE, "Cannot initialize port %"PRIu8"\n", 554 portid); 555 } 556 557 if (!nb_ports_available) { 558 rte_exit(EXIT_FAILURE, 559 "All available ports are disabled. Please set portmask.\n"); 560 } 561 562 d = rte_distributor_create("PKT_DIST", rte_socket_id(), 563 rte_lcore_count() - 2); 564 if (d == NULL) 565 rte_exit(EXIT_FAILURE, "Cannot create distributor\n"); 566 567 /* 568 * scheduler ring is read only by the transmitter core, but written to 569 * by multiple threads 570 */ 571 output_ring = rte_ring_create("Output_ring", RTE_RING_SZ, 572 rte_socket_id(), RING_F_SC_DEQ); 573 if (output_ring == NULL) 574 rte_exit(EXIT_FAILURE, "Cannot create output ring\n"); 575 576 RTE_LCORE_FOREACH_SLAVE(lcore_id) { 577 if (worker_id == rte_lcore_count() - 2) 578 rte_eal_remote_launch((lcore_function_t *)lcore_tx, 579 output_ring, lcore_id); 580 else { 581 struct lcore_params *p = 582 rte_malloc(NULL, sizeof(*p), 0); 583 if (!p) 584 rte_panic("malloc failure\n"); 585 *p = (struct lcore_params){worker_id, d, output_ring, mbuf_pool}; 586 587 rte_eal_remote_launch((lcore_function_t *)lcore_worker, 588 p, lcore_id); 589 } 590 worker_id++; 591 } 592 /* call lcore_main on master core only */ 593 struct lcore_params p = { 0, d, output_ring, mbuf_pool}; 594 lcore_rx(&p); 595 596 RTE_LCORE_FOREACH_SLAVE(lcore_id) { 597 if (rte_eal_wait_lcore(lcore_id) < 0) 598 return -1; 599 } 600 601 print_stats(); 602 return 0; 603 } 604