1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2015 Intel Corporation 3 */ 4 5 #include <stdint.h> 6 #include <inttypes.h> 7 #include <getopt.h> 8 #include <rte_eal.h> 9 #include <rte_ethdev.h> 10 #include <rte_cycles.h> 11 #include <rte_lcore.h> 12 #include <rte_mbuf.h> 13 #include <rte_mbuf_dyn.h> 14 15 #define RX_RING_SIZE 1024 16 #define TX_RING_SIZE 1024 17 18 #define NUM_MBUFS 8191 19 #define MBUF_CACHE_SIZE 250 20 #define BURST_SIZE 32 21 22 typedef uint64_t tsc_t; 23 static int tsc_dynfield_offset = -1; 24 25 static inline tsc_t * 26 tsc_field(struct rte_mbuf *mbuf) 27 { 28 return RTE_MBUF_DYNFIELD(mbuf, tsc_dynfield_offset, tsc_t *); 29 } 30 31 static const char usage[] = 32 "%s EAL_ARGS -- [-t]\n"; 33 34 static const struct rte_eth_conf port_conf_default = { 35 .rxmode = { 36 .max_rx_pkt_len = RTE_ETHER_MAX_LEN, 37 }, 38 }; 39 40 static struct { 41 uint64_t total_cycles; 42 uint64_t total_queue_cycles; 43 uint64_t total_pkts; 44 } latency_numbers; 45 46 int hw_timestamping; 47 48 #define TICKS_PER_CYCLE_SHIFT 16 49 static uint64_t ticks_per_cycle_mult; 50 51 static uint16_t 52 add_timestamps(uint16_t port __rte_unused, uint16_t qidx __rte_unused, 53 struct rte_mbuf **pkts, uint16_t nb_pkts, 54 uint16_t max_pkts __rte_unused, void *_ __rte_unused) 55 { 56 unsigned i; 57 uint64_t now = rte_rdtsc(); 58 59 for (i = 0; i < nb_pkts; i++) 60 *tsc_field(pkts[i]) = now; 61 return nb_pkts; 62 } 63 64 static uint16_t 65 calc_latency(uint16_t port, uint16_t qidx __rte_unused, 66 struct rte_mbuf **pkts, uint16_t nb_pkts, void *_ __rte_unused) 67 { 68 uint64_t cycles = 0; 69 uint64_t queue_ticks = 0; 70 uint64_t now = rte_rdtsc(); 71 uint64_t ticks; 72 unsigned i; 73 74 if (hw_timestamping) 75 rte_eth_read_clock(port, &ticks); 76 77 for (i = 0; i < nb_pkts; i++) { 78 cycles += now - *tsc_field(pkts[i]); 79 if (hw_timestamping) 80 queue_ticks += ticks - pkts[i]->timestamp; 81 } 82 83 latency_numbers.total_cycles += cycles; 84 if (hw_timestamping) 85 latency_numbers.total_queue_cycles += (queue_ticks 86 * ticks_per_cycle_mult) >> TICKS_PER_CYCLE_SHIFT; 87 88 latency_numbers.total_pkts += nb_pkts; 89 90 if (latency_numbers.total_pkts > (100 * 1000 * 1000ULL)) { 91 printf("Latency = %"PRIu64" cycles\n", 92 latency_numbers.total_cycles / latency_numbers.total_pkts); 93 if (hw_timestamping) { 94 printf("Latency from HW = %"PRIu64" cycles\n", 95 latency_numbers.total_queue_cycles 96 / latency_numbers.total_pkts); 97 } 98 latency_numbers.total_cycles = 0; 99 latency_numbers.total_queue_cycles = 0; 100 latency_numbers.total_pkts = 0; 101 } 102 return nb_pkts; 103 } 104 105 /* 106 * Initialises a given port using global settings and with the rx buffers 107 * coming from the mbuf_pool passed as parameter 108 */ 109 static inline int 110 port_init(uint16_t port, struct rte_mempool *mbuf_pool) 111 { 112 struct rte_eth_conf port_conf = port_conf_default; 113 const uint16_t rx_rings = 1, tx_rings = 1; 114 uint16_t nb_rxd = RX_RING_SIZE; 115 uint16_t nb_txd = TX_RING_SIZE; 116 int retval; 117 uint16_t q; 118 struct rte_eth_dev_info dev_info; 119 struct rte_eth_rxconf rxconf; 120 struct rte_eth_txconf txconf; 121 122 if (!rte_eth_dev_is_valid_port(port)) 123 return -1; 124 125 retval = rte_eth_dev_info_get(port, &dev_info); 126 if (retval != 0) { 127 printf("Error during getting device (port %u) info: %s\n", 128 port, strerror(-retval)); 129 130 return retval; 131 } 132 133 if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE) 134 port_conf.txmode.offloads |= 135 DEV_TX_OFFLOAD_MBUF_FAST_FREE; 136 137 if (hw_timestamping) { 138 if (!(dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TIMESTAMP)) { 139 printf("\nERROR: Port %u does not support hardware timestamping\n" 140 , port); 141 return -1; 142 } 143 port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_TIMESTAMP; 144 } 145 146 retval = rte_eth_dev_configure(port, rx_rings, tx_rings, &port_conf); 147 if (retval != 0) 148 return retval; 149 150 retval = rte_eth_dev_adjust_nb_rx_tx_desc(port, &nb_rxd, &nb_txd); 151 if (retval != 0) 152 return retval; 153 154 rxconf = dev_info.default_rxconf; 155 156 for (q = 0; q < rx_rings; q++) { 157 retval = rte_eth_rx_queue_setup(port, q, nb_rxd, 158 rte_eth_dev_socket_id(port), &rxconf, mbuf_pool); 159 if (retval < 0) 160 return retval; 161 } 162 163 txconf = dev_info.default_txconf; 164 txconf.offloads = port_conf.txmode.offloads; 165 for (q = 0; q < tx_rings; q++) { 166 retval = rte_eth_tx_queue_setup(port, q, nb_txd, 167 rte_eth_dev_socket_id(port), &txconf); 168 if (retval < 0) 169 return retval; 170 } 171 172 retval = rte_eth_dev_start(port); 173 if (retval < 0) 174 return retval; 175 176 if (hw_timestamping && ticks_per_cycle_mult == 0) { 177 uint64_t cycles_base = rte_rdtsc(); 178 uint64_t ticks_base; 179 retval = rte_eth_read_clock(port, &ticks_base); 180 if (retval != 0) 181 return retval; 182 rte_delay_ms(100); 183 uint64_t cycles = rte_rdtsc(); 184 uint64_t ticks; 185 rte_eth_read_clock(port, &ticks); 186 uint64_t c_freq = cycles - cycles_base; 187 uint64_t t_freq = ticks - ticks_base; 188 double freq_mult = (double)c_freq / t_freq; 189 printf("TSC Freq ~= %" PRIu64 190 "\nHW Freq ~= %" PRIu64 191 "\nRatio : %f\n", 192 c_freq * 10, t_freq * 10, freq_mult); 193 /* TSC will be faster than internal ticks so freq_mult is > 0 194 * We convert the multiplication to an integer shift & mult 195 */ 196 ticks_per_cycle_mult = (1 << TICKS_PER_CYCLE_SHIFT) / freq_mult; 197 } 198 199 struct rte_ether_addr addr; 200 201 retval = rte_eth_macaddr_get(port, &addr); 202 if (retval < 0) { 203 printf("Failed to get MAC address on port %u: %s\n", 204 port, rte_strerror(-retval)); 205 return retval; 206 } 207 printf("Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8 208 " %02"PRIx8" %02"PRIx8" %02"PRIx8"\n", 209 (unsigned)port, 210 addr.addr_bytes[0], addr.addr_bytes[1], 211 addr.addr_bytes[2], addr.addr_bytes[3], 212 addr.addr_bytes[4], addr.addr_bytes[5]); 213 214 retval = rte_eth_promiscuous_enable(port); 215 if (retval != 0) 216 return retval; 217 218 rte_eth_add_rx_callback(port, 0, add_timestamps, NULL); 219 rte_eth_add_tx_callback(port, 0, calc_latency, NULL); 220 221 return 0; 222 } 223 224 /* 225 * Main thread that does the work, reading from INPUT_PORT 226 * and writing to OUTPUT_PORT 227 */ 228 static __rte_noreturn void 229 lcore_main(void) 230 { 231 uint16_t port; 232 233 RTE_ETH_FOREACH_DEV(port) 234 if (rte_eth_dev_socket_id(port) > 0 && 235 rte_eth_dev_socket_id(port) != 236 (int)rte_socket_id()) 237 printf("WARNING, port %u is on remote NUMA node to " 238 "polling thread.\n\tPerformance will " 239 "not be optimal.\n", port); 240 241 printf("\nCore %u forwarding packets. [Ctrl+C to quit]\n", 242 rte_lcore_id()); 243 for (;;) { 244 RTE_ETH_FOREACH_DEV(port) { 245 struct rte_mbuf *bufs[BURST_SIZE]; 246 const uint16_t nb_rx = rte_eth_rx_burst(port, 0, 247 bufs, BURST_SIZE); 248 if (unlikely(nb_rx == 0)) 249 continue; 250 const uint16_t nb_tx = rte_eth_tx_burst(port ^ 1, 0, 251 bufs, nb_rx); 252 if (unlikely(nb_tx < nb_rx)) { 253 uint16_t buf; 254 255 for (buf = nb_tx; buf < nb_rx; buf++) 256 rte_pktmbuf_free(bufs[buf]); 257 } 258 } 259 } 260 } 261 262 /* Main function, does initialisation and calls the per-lcore functions */ 263 int 264 main(int argc, char *argv[]) 265 { 266 struct rte_mempool *mbuf_pool; 267 uint16_t nb_ports; 268 uint16_t portid; 269 struct option lgopts[] = { 270 { NULL, 0, 0, 0 } 271 }; 272 int opt, option_index; 273 274 static const struct rte_mbuf_dynfield tsc_dynfield_desc = { 275 .name = "example_bbdev_dynfield_tsc", 276 .size = sizeof(tsc_t), 277 .align = __alignof__(tsc_t), 278 }; 279 280 /* init EAL */ 281 int ret = rte_eal_init(argc, argv); 282 283 if (ret < 0) 284 rte_exit(EXIT_FAILURE, "Error with EAL initialization\n"); 285 argc -= ret; 286 argv += ret; 287 288 while ((opt = getopt_long(argc, argv, "t", lgopts, &option_index)) 289 != EOF) 290 switch (opt) { 291 case 't': 292 hw_timestamping = 1; 293 break; 294 default: 295 printf(usage, argv[0]); 296 return -1; 297 } 298 optind = 1; /* reset getopt lib */ 299 300 nb_ports = rte_eth_dev_count_avail(); 301 if (nb_ports < 2 || (nb_ports & 1)) 302 rte_exit(EXIT_FAILURE, "Error: number of ports must be even\n"); 303 304 mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL", 305 NUM_MBUFS * nb_ports, MBUF_CACHE_SIZE, 0, 306 RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id()); 307 if (mbuf_pool == NULL) 308 rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n"); 309 310 tsc_dynfield_offset = 311 rte_mbuf_dynfield_register(&tsc_dynfield_desc); 312 if (tsc_dynfield_offset < 0) 313 rte_exit(EXIT_FAILURE, "Cannot register mbuf field\n"); 314 315 /* initialize all ports */ 316 RTE_ETH_FOREACH_DEV(portid) 317 if (port_init(portid, mbuf_pool) != 0) 318 rte_exit(EXIT_FAILURE, "Cannot init port %"PRIu8"\n", 319 portid); 320 321 if (rte_lcore_count() > 1) 322 printf("\nWARNING: Too much enabled lcores - " 323 "App uses only 1 lcore\n"); 324 325 /* call lcore_main on main core only */ 326 lcore_main(); 327 return 0; 328 } 329