1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2015 Intel Corporation 3 */ 4 5 #include <stdint.h> 6 #include <inttypes.h> 7 #include <getopt.h> 8 #include <rte_eal.h> 9 #include <rte_ethdev.h> 10 #include <rte_cycles.h> 11 #include <rte_lcore.h> 12 #include <rte_mbuf.h> 13 14 #define RX_RING_SIZE 1024 15 #define TX_RING_SIZE 1024 16 17 #define NUM_MBUFS 8191 18 #define MBUF_CACHE_SIZE 250 19 #define BURST_SIZE 32 20 21 static const char usage[] = 22 "%s EAL_ARGS -- [-t]\n"; 23 24 static const struct rte_eth_conf port_conf_default = { 25 .rxmode = { 26 .max_rx_pkt_len = RTE_ETHER_MAX_LEN, 27 }, 28 }; 29 30 static struct { 31 uint64_t total_cycles; 32 uint64_t total_queue_cycles; 33 uint64_t total_pkts; 34 } latency_numbers; 35 36 int hw_timestamping; 37 38 #define TICKS_PER_CYCLE_SHIFT 16 39 static uint64_t ticks_per_cycle_mult; 40 41 static uint16_t 42 add_timestamps(uint16_t port __rte_unused, uint16_t qidx __rte_unused, 43 struct rte_mbuf **pkts, uint16_t nb_pkts, 44 uint16_t max_pkts __rte_unused, void *_ __rte_unused) 45 { 46 unsigned i; 47 uint64_t now = rte_rdtsc(); 48 49 for (i = 0; i < nb_pkts; i++) 50 pkts[i]->udata64 = now; 51 return nb_pkts; 52 } 53 54 static uint16_t 55 calc_latency(uint16_t port, uint16_t qidx __rte_unused, 56 struct rte_mbuf **pkts, uint16_t nb_pkts, void *_ __rte_unused) 57 { 58 uint64_t cycles = 0; 59 uint64_t queue_ticks = 0; 60 uint64_t now = rte_rdtsc(); 61 uint64_t ticks; 62 unsigned i; 63 64 if (hw_timestamping) 65 rte_eth_read_clock(port, &ticks); 66 67 for (i = 0; i < nb_pkts; i++) { 68 cycles += now - pkts[i]->udata64; 69 if (hw_timestamping) 70 queue_ticks += ticks - pkts[i]->timestamp; 71 } 72 73 latency_numbers.total_cycles += cycles; 74 if (hw_timestamping) 75 latency_numbers.total_queue_cycles += (queue_ticks 76 * ticks_per_cycle_mult) >> TICKS_PER_CYCLE_SHIFT; 77 78 latency_numbers.total_pkts += nb_pkts; 79 80 if (latency_numbers.total_pkts > (100 * 1000 * 1000ULL)) { 81 printf("Latency = %"PRIu64" cycles\n", 82 latency_numbers.total_cycles / latency_numbers.total_pkts); 83 if (hw_timestamping) { 84 printf("Latency from HW = %"PRIu64" cycles\n", 85 latency_numbers.total_queue_cycles 86 / latency_numbers.total_pkts); 87 } 88 latency_numbers.total_cycles = 0; 89 latency_numbers.total_queue_cycles = 0; 90 latency_numbers.total_pkts = 0; 91 } 92 return nb_pkts; 93 } 94 95 /* 96 * Initialises a given port using global settings and with the rx buffers 97 * coming from the mbuf_pool passed as parameter 98 */ 99 static inline int 100 port_init(uint16_t port, struct rte_mempool *mbuf_pool) 101 { 102 struct rte_eth_conf port_conf = port_conf_default; 103 const uint16_t rx_rings = 1, tx_rings = 1; 104 uint16_t nb_rxd = RX_RING_SIZE; 105 uint16_t nb_txd = TX_RING_SIZE; 106 int retval; 107 uint16_t q; 108 struct rte_eth_dev_info dev_info; 109 struct rte_eth_rxconf rxconf; 110 struct rte_eth_txconf txconf; 111 112 if (!rte_eth_dev_is_valid_port(port)) 113 return -1; 114 115 retval = rte_eth_dev_info_get(port, &dev_info); 116 if (retval != 0) { 117 printf("Error during getting device (port %u) info: %s\n", 118 port, strerror(-retval)); 119 120 return retval; 121 } 122 123 if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE) 124 port_conf.txmode.offloads |= 125 DEV_TX_OFFLOAD_MBUF_FAST_FREE; 126 127 if (hw_timestamping) { 128 if (!(dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TIMESTAMP)) { 129 printf("\nERROR: Port %u does not support hardware timestamping\n" 130 , port); 131 return -1; 132 } 133 port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_TIMESTAMP; 134 } 135 136 retval = rte_eth_dev_configure(port, rx_rings, tx_rings, &port_conf); 137 if (retval != 0) 138 return retval; 139 140 retval = rte_eth_dev_adjust_nb_rx_tx_desc(port, &nb_rxd, &nb_txd); 141 if (retval != 0) 142 return retval; 143 144 rxconf = dev_info.default_rxconf; 145 146 for (q = 0; q < rx_rings; q++) { 147 retval = rte_eth_rx_queue_setup(port, q, nb_rxd, 148 rte_eth_dev_socket_id(port), &rxconf, mbuf_pool); 149 if (retval < 0) 150 return retval; 151 } 152 153 txconf = dev_info.default_txconf; 154 txconf.offloads = port_conf.txmode.offloads; 155 for (q = 0; q < tx_rings; q++) { 156 retval = rte_eth_tx_queue_setup(port, q, nb_txd, 157 rte_eth_dev_socket_id(port), &txconf); 158 if (retval < 0) 159 return retval; 160 } 161 162 retval = rte_eth_dev_start(port); 163 if (retval < 0) 164 return retval; 165 166 if (hw_timestamping && ticks_per_cycle_mult == 0) { 167 uint64_t cycles_base = rte_rdtsc(); 168 uint64_t ticks_base; 169 retval = rte_eth_read_clock(port, &ticks_base); 170 if (retval != 0) 171 return retval; 172 rte_delay_ms(100); 173 uint64_t cycles = rte_rdtsc(); 174 uint64_t ticks; 175 rte_eth_read_clock(port, &ticks); 176 uint64_t c_freq = cycles - cycles_base; 177 uint64_t t_freq = ticks - ticks_base; 178 double freq_mult = (double)c_freq / t_freq; 179 printf("TSC Freq ~= %" PRIu64 180 "\nHW Freq ~= %" PRIu64 181 "\nRatio : %f\n", 182 c_freq * 10, t_freq * 10, freq_mult); 183 /* TSC will be faster than internal ticks so freq_mult is > 0 184 * We convert the multiplication to an integer shift & mult 185 */ 186 ticks_per_cycle_mult = (1 << TICKS_PER_CYCLE_SHIFT) / freq_mult; 187 } 188 189 struct rte_ether_addr addr; 190 191 rte_eth_macaddr_get(port, &addr); 192 printf("Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8 193 " %02"PRIx8" %02"PRIx8" %02"PRIx8"\n", 194 (unsigned)port, 195 addr.addr_bytes[0], addr.addr_bytes[1], 196 addr.addr_bytes[2], addr.addr_bytes[3], 197 addr.addr_bytes[4], addr.addr_bytes[5]); 198 199 rte_eth_promiscuous_enable(port); 200 rte_eth_add_rx_callback(port, 0, add_timestamps, NULL); 201 rte_eth_add_tx_callback(port, 0, calc_latency, NULL); 202 203 return 0; 204 } 205 206 /* 207 * Main thread that does the work, reading from INPUT_PORT 208 * and writing to OUTPUT_PORT 209 */ 210 static __attribute__((noreturn)) void 211 lcore_main(void) 212 { 213 uint16_t port; 214 215 RTE_ETH_FOREACH_DEV(port) 216 if (rte_eth_dev_socket_id(port) > 0 && 217 rte_eth_dev_socket_id(port) != 218 (int)rte_socket_id()) 219 printf("WARNING, port %u is on remote NUMA node to " 220 "polling thread.\n\tPerformance will " 221 "not be optimal.\n", port); 222 223 printf("\nCore %u forwarding packets. [Ctrl+C to quit]\n", 224 rte_lcore_id()); 225 for (;;) { 226 RTE_ETH_FOREACH_DEV(port) { 227 struct rte_mbuf *bufs[BURST_SIZE]; 228 const uint16_t nb_rx = rte_eth_rx_burst(port, 0, 229 bufs, BURST_SIZE); 230 if (unlikely(nb_rx == 0)) 231 continue; 232 const uint16_t nb_tx = rte_eth_tx_burst(port ^ 1, 0, 233 bufs, nb_rx); 234 if (unlikely(nb_tx < nb_rx)) { 235 uint16_t buf; 236 237 for (buf = nb_tx; buf < nb_rx; buf++) 238 rte_pktmbuf_free(bufs[buf]); 239 } 240 } 241 } 242 } 243 244 /* Main function, does initialisation and calls the per-lcore functions */ 245 int 246 main(int argc, char *argv[]) 247 { 248 struct rte_mempool *mbuf_pool; 249 uint16_t nb_ports; 250 uint16_t portid; 251 struct option lgopts[] = { 252 { NULL, 0, 0, 0 } 253 }; 254 int opt, option_index; 255 256 257 /* init EAL */ 258 int ret = rte_eal_init(argc, argv); 259 260 if (ret < 0) 261 rte_exit(EXIT_FAILURE, "Error with EAL initialization\n"); 262 argc -= ret; 263 argv += ret; 264 265 while ((opt = getopt_long(argc, argv, "t", lgopts, &option_index)) 266 != EOF) 267 switch (opt) { 268 case 't': 269 hw_timestamping = 1; 270 break; 271 default: 272 printf(usage, argv[0]); 273 return -1; 274 } 275 optind = 1; /* reset getopt lib */ 276 277 nb_ports = rte_eth_dev_count_avail(); 278 if (nb_ports < 2 || (nb_ports & 1)) 279 rte_exit(EXIT_FAILURE, "Error: number of ports must be even\n"); 280 281 mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL", 282 NUM_MBUFS * nb_ports, MBUF_CACHE_SIZE, 0, 283 RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id()); 284 if (mbuf_pool == NULL) 285 rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n"); 286 287 /* initialize all ports */ 288 RTE_ETH_FOREACH_DEV(portid) 289 if (port_init(portid, mbuf_pool) != 0) 290 rte_exit(EXIT_FAILURE, "Cannot init port %"PRIu8"\n", 291 portid); 292 293 if (rte_lcore_count() > 1) 294 printf("\nWARNING: Too much enabled lcores - " 295 "App uses only 1 lcore\n"); 296 297 /* call lcore_main on master core only */ 298 lcore_main(); 299 return 0; 300 } 301