1a9de470cSBruce Richardson /* SPDX-License-Identifier: BSD-3-Clause
2a9de470cSBruce Richardson * Copyright(c) 2010-2017 Intel Corporation
3a9de470cSBruce Richardson */
4a9de470cSBruce Richardson
5a9de470cSBruce Richardson #include "test.h"
6a9de470cSBruce Richardson
7a9de470cSBruce Richardson #include <unistd.h>
8a9de470cSBruce Richardson #include <string.h>
9a9de470cSBruce Richardson #include <rte_mempool.h>
10a9de470cSBruce Richardson #include <rte_cycles.h>
11a9de470cSBruce Richardson #include <rte_common.h>
12a9de470cSBruce Richardson #include <rte_mbuf.h>
133c60274cSJie Zhou
143c60274cSJie Zhou #ifdef RTE_EXEC_ENV_WINDOWS
153c60274cSJie Zhou static int
test_distributor_perf(void)163c60274cSJie Zhou test_distributor_perf(void)
173c60274cSJie Zhou {
183c60274cSJie Zhou printf("distributor perf not supported on Windows, skipping test\n");
193c60274cSJie Zhou return TEST_SKIPPED;
203c60274cSJie Zhou }
213c60274cSJie Zhou
223c60274cSJie Zhou #else
233c60274cSJie Zhou
24a9de470cSBruce Richardson #include <rte_distributor.h>
25a9de470cSBruce Richardson #include <rte_pause.h>
26a9de470cSBruce Richardson
27a9de470cSBruce Richardson #define ITER_POWER_CL 25 /* log 2 of how many iterations for Cache Line test */
28a9de470cSBruce Richardson #define ITER_POWER 21 /* log 2 of how many iterations we do when timing. */
29a9de470cSBruce Richardson #define BURST 64
30a9de470cSBruce Richardson #define BIG_BATCH 1024
31a9de470cSBruce Richardson
32a9de470cSBruce Richardson /* static vars - zero initialized by default */
33a9de470cSBruce Richardson static volatile int quit;
34*b6a7e685STyler Retzlaff static volatile RTE_ATOMIC(unsigned int) worker_idx;
35a9de470cSBruce Richardson
360efea35aSTyler Retzlaff struct __rte_cache_aligned worker_stats {
37a9de470cSBruce Richardson volatile unsigned handled_packets;
380efea35aSTyler Retzlaff };
39d1705276SFerruh Yigit static struct worker_stats worker_stats[RTE_MAX_LCORE];
40a9de470cSBruce Richardson
41a9de470cSBruce Richardson /*
42a9de470cSBruce Richardson * worker thread used for testing the time to do a round-trip of a cache
43a9de470cSBruce Richardson * line between two cores and back again
44a9de470cSBruce Richardson */
45a9de470cSBruce Richardson static int
flip_bit(volatile uint64_t * arg)46a9de470cSBruce Richardson flip_bit(volatile uint64_t *arg)
47a9de470cSBruce Richardson {
48a9de470cSBruce Richardson uint64_t old_val = 0;
49a9de470cSBruce Richardson while (old_val != 2) {
50a9de470cSBruce Richardson while (!*arg)
51a9de470cSBruce Richardson rte_pause();
52a9de470cSBruce Richardson old_val = *arg;
53a9de470cSBruce Richardson *arg = 0;
54a9de470cSBruce Richardson }
55a9de470cSBruce Richardson return 0;
56a9de470cSBruce Richardson }
57a9de470cSBruce Richardson
58a9de470cSBruce Richardson /*
59a9de470cSBruce Richardson * test case to time the number of cycles to round-trip a cache line between
60a9de470cSBruce Richardson * two cores and back again.
61a9de470cSBruce Richardson */
62a9de470cSBruce Richardson static void
time_cache_line_switch(void)63a9de470cSBruce Richardson time_cache_line_switch(void)
64a9de470cSBruce Richardson {
65a9de470cSBruce Richardson /* allocate a full cache line for data, we use only first byte of it */
66a9de470cSBruce Richardson uint64_t data[RTE_CACHE_LINE_SIZE*3 / sizeof(uint64_t)];
67a9de470cSBruce Richardson
68cb056611SStephen Hemminger unsigned int i, workerid = rte_get_next_lcore(rte_lcore_id(), 0, 0);
69a9de470cSBruce Richardson volatile uint64_t *pdata = &data[0];
70a9de470cSBruce Richardson *pdata = 1;
71cb056611SStephen Hemminger rte_eal_remote_launch((lcore_function_t *)flip_bit, &data[0], workerid);
72a9de470cSBruce Richardson while (*pdata)
73a9de470cSBruce Richardson rte_pause();
74a9de470cSBruce Richardson
75a9de470cSBruce Richardson const uint64_t start_time = rte_rdtsc();
76a9de470cSBruce Richardson for (i = 0; i < (1 << ITER_POWER_CL); i++) {
77a9de470cSBruce Richardson while (*pdata)
78a9de470cSBruce Richardson rte_pause();
79a9de470cSBruce Richardson *pdata = 1;
80a9de470cSBruce Richardson }
81a9de470cSBruce Richardson const uint64_t end_time = rte_rdtsc();
82a9de470cSBruce Richardson
83a9de470cSBruce Richardson while (*pdata)
84a9de470cSBruce Richardson rte_pause();
85a9de470cSBruce Richardson *pdata = 2;
86cb056611SStephen Hemminger rte_eal_wait_lcore(workerid);
87a9de470cSBruce Richardson printf("==== Cache line switch test ===\n");
88a9de470cSBruce Richardson printf("Time for %u iterations = %"PRIu64" ticks\n", (1<<ITER_POWER_CL),
89a9de470cSBruce Richardson end_time-start_time);
90a9de470cSBruce Richardson printf("Ticks per iteration = %"PRIu64"\n\n",
91a9de470cSBruce Richardson (end_time-start_time) >> ITER_POWER_CL);
92a9de470cSBruce Richardson }
93a9de470cSBruce Richardson
94a9de470cSBruce Richardson /*
95a9de470cSBruce Richardson * returns the total count of the number of packets handled by the worker
96a9de470cSBruce Richardson * functions given below.
97a9de470cSBruce Richardson */
98a9de470cSBruce Richardson static unsigned
total_packet_count(void)99a9de470cSBruce Richardson total_packet_count(void)
100a9de470cSBruce Richardson {
101a9de470cSBruce Richardson unsigned i, count = 0;
102a9de470cSBruce Richardson for (i = 0; i < worker_idx; i++)
103a9de470cSBruce Richardson count += worker_stats[i].handled_packets;
104a9de470cSBruce Richardson return count;
105a9de470cSBruce Richardson }
106a9de470cSBruce Richardson
107a9de470cSBruce Richardson /* resets the packet counts for a new test */
108a9de470cSBruce Richardson static void
clear_packet_count(void)109a9de470cSBruce Richardson clear_packet_count(void)
110a9de470cSBruce Richardson {
111a9de470cSBruce Richardson memset(&worker_stats, 0, sizeof(worker_stats));
112a9de470cSBruce Richardson }
113a9de470cSBruce Richardson
114a9de470cSBruce Richardson /*
115a9de470cSBruce Richardson * This is the basic worker function for performance tests.
116a9de470cSBruce Richardson * it does nothing but return packets and count them.
117a9de470cSBruce Richardson */
118a9de470cSBruce Richardson static int
handle_work(void * arg)119a9de470cSBruce Richardson handle_work(void *arg)
120a9de470cSBruce Richardson {
121a9de470cSBruce Richardson struct rte_distributor *d = arg;
122a9de470cSBruce Richardson unsigned int num = 0;
123a9de470cSBruce Richardson int i;
124*b6a7e685STyler Retzlaff unsigned int id = rte_atomic_fetch_add_explicit(&worker_idx, 1, rte_memory_order_relaxed);
1250efea35aSTyler Retzlaff alignas(RTE_CACHE_LINE_SIZE) struct rte_mbuf *buf[8];
126a9de470cSBruce Richardson
127a9de470cSBruce Richardson for (i = 0; i < 8; i++)
128a9de470cSBruce Richardson buf[i] = NULL;
129a9de470cSBruce Richardson
130a9de470cSBruce Richardson num = rte_distributor_get_pkt(d, id, buf, buf, num);
131a9de470cSBruce Richardson while (!quit) {
132a9de470cSBruce Richardson worker_stats[id].handled_packets += num;
133a9de470cSBruce Richardson num = rte_distributor_get_pkt(d, id, buf, buf, num);
134a9de470cSBruce Richardson }
135a9de470cSBruce Richardson worker_stats[id].handled_packets += num;
136a9de470cSBruce Richardson rte_distributor_return_pkt(d, id, buf, num);
137a9de470cSBruce Richardson return 0;
138a9de470cSBruce Richardson }
139a9de470cSBruce Richardson
140a9de470cSBruce Richardson /*
141a9de470cSBruce Richardson * This basic performance test just repeatedly sends in 32 packets at a time
142a9de470cSBruce Richardson * to the distributor and verifies at the end that we got them all in the worker
143a9de470cSBruce Richardson * threads and finally how long per packet the processing took.
144a9de470cSBruce Richardson */
145a9de470cSBruce Richardson static inline int
perf_test(struct rte_distributor * d,struct rte_mempool * p)146a9de470cSBruce Richardson perf_test(struct rte_distributor *d, struct rte_mempool *p)
147a9de470cSBruce Richardson {
148a9de470cSBruce Richardson unsigned int i;
149a9de470cSBruce Richardson uint64_t start, end;
150a9de470cSBruce Richardson struct rte_mbuf *bufs[BURST];
151a9de470cSBruce Richardson
152a9de470cSBruce Richardson clear_packet_count();
153a9de470cSBruce Richardson if (rte_mempool_get_bulk(p, (void *)bufs, BURST) != 0) {
154a9de470cSBruce Richardson printf("Error getting mbufs from pool\n");
155a9de470cSBruce Richardson return -1;
156a9de470cSBruce Richardson }
157a9de470cSBruce Richardson /* ensure we have different hash value for each pkt */
158a9de470cSBruce Richardson for (i = 0; i < BURST; i++)
159a9de470cSBruce Richardson bufs[i]->hash.usr = i;
160a9de470cSBruce Richardson
161a9de470cSBruce Richardson start = rte_rdtsc();
162a9de470cSBruce Richardson for (i = 0; i < (1<<ITER_POWER); i++)
163a9de470cSBruce Richardson rte_distributor_process(d, bufs, BURST);
164a9de470cSBruce Richardson end = rte_rdtsc();
165a9de470cSBruce Richardson
166a9de470cSBruce Richardson do {
167a9de470cSBruce Richardson usleep(100);
168a9de470cSBruce Richardson rte_distributor_process(d, NULL, 0);
169a9de470cSBruce Richardson } while (total_packet_count() < (BURST << ITER_POWER));
170a9de470cSBruce Richardson
171a9de470cSBruce Richardson rte_distributor_clear_returns(d);
172a9de470cSBruce Richardson
173a9de470cSBruce Richardson printf("Time per burst: %"PRIu64"\n", (end - start) >> ITER_POWER);
174a9de470cSBruce Richardson printf("Time per packet: %"PRIu64"\n\n",
175a9de470cSBruce Richardson ((end - start) >> ITER_POWER)/BURST);
176a9de470cSBruce Richardson rte_mempool_put_bulk(p, (void *)bufs, BURST);
177a9de470cSBruce Richardson
178a9de470cSBruce Richardson for (i = 0; i < rte_lcore_count() - 1; i++)
179a9de470cSBruce Richardson printf("Worker %u handled %u packets\n", i,
180a9de470cSBruce Richardson worker_stats[i].handled_packets);
181a9de470cSBruce Richardson printf("Total packets: %u (%x)\n", total_packet_count(),
182a9de470cSBruce Richardson total_packet_count());
183a9de470cSBruce Richardson printf("=== Perf test done ===\n\n");
184a9de470cSBruce Richardson
185a9de470cSBruce Richardson return 0;
186a9de470cSBruce Richardson }
187a9de470cSBruce Richardson
188a9de470cSBruce Richardson /* Useful function which ensures that all worker functions terminate */
189a9de470cSBruce Richardson static void
quit_workers(struct rte_distributor * d,struct rte_mempool * p)190a9de470cSBruce Richardson quit_workers(struct rte_distributor *d, struct rte_mempool *p)
191a9de470cSBruce Richardson {
192a9de470cSBruce Richardson const unsigned int num_workers = rte_lcore_count() - 1;
193a9de470cSBruce Richardson unsigned int i;
194a9de470cSBruce Richardson struct rte_mbuf *bufs[RTE_MAX_LCORE];
195a9de470cSBruce Richardson
196a9de470cSBruce Richardson rte_mempool_get_bulk(p, (void *)bufs, num_workers);
197a9de470cSBruce Richardson
198a9de470cSBruce Richardson quit = 1;
19900dac9a9SStanislaw Kardach for (i = 0; i < num_workers; i++) {
200a9de470cSBruce Richardson bufs[i]->hash.usr = i << 1;
20100dac9a9SStanislaw Kardach rte_distributor_process(d, &bufs[i], 1);
20200dac9a9SStanislaw Kardach }
203a9de470cSBruce Richardson
204a9de470cSBruce Richardson rte_mempool_put_bulk(p, (void *)bufs, num_workers);
205a9de470cSBruce Richardson
206a9de470cSBruce Richardson rte_distributor_process(d, NULL, 0);
2076cda39afSStanislaw Kardach rte_distributor_flush(d);
208a9de470cSBruce Richardson rte_eal_mp_wait_lcore();
209a9de470cSBruce Richardson quit = 0;
210a9de470cSBruce Richardson worker_idx = 0;
211a9de470cSBruce Richardson }
212a9de470cSBruce Richardson
213a9de470cSBruce Richardson static int
test_distributor_perf(void)214a9de470cSBruce Richardson test_distributor_perf(void)
215a9de470cSBruce Richardson {
216a9de470cSBruce Richardson static struct rte_distributor *ds;
217a9de470cSBruce Richardson static struct rte_distributor *db;
218a9de470cSBruce Richardson static struct rte_mempool *p;
219a9de470cSBruce Richardson
220a9de470cSBruce Richardson if (rte_lcore_count() < 2) {
221e0f4a0edSDavid Marchand printf("Not enough cores for distributor_perf_autotest, expecting at least 2\n");
222e0f4a0edSDavid Marchand return TEST_SKIPPED;
223a9de470cSBruce Richardson }
224a9de470cSBruce Richardson
225a9de470cSBruce Richardson /* first time how long it takes to round-trip a cache line */
226a9de470cSBruce Richardson time_cache_line_switch();
227a9de470cSBruce Richardson
228a9de470cSBruce Richardson if (ds == NULL) {
229a9de470cSBruce Richardson ds = rte_distributor_create("Test_perf", rte_socket_id(),
230a9de470cSBruce Richardson rte_lcore_count() - 1,
231a9de470cSBruce Richardson RTE_DIST_ALG_SINGLE);
232a9de470cSBruce Richardson if (ds == NULL) {
233a9de470cSBruce Richardson printf("Error creating distributor\n");
234a9de470cSBruce Richardson return -1;
235a9de470cSBruce Richardson }
236a9de470cSBruce Richardson } else {
237a9de470cSBruce Richardson rte_distributor_clear_returns(ds);
238a9de470cSBruce Richardson }
239a9de470cSBruce Richardson
240a9de470cSBruce Richardson if (db == NULL) {
241a9de470cSBruce Richardson db = rte_distributor_create("Test_burst", rte_socket_id(),
242a9de470cSBruce Richardson rte_lcore_count() - 1,
243a9de470cSBruce Richardson RTE_DIST_ALG_BURST);
244a9de470cSBruce Richardson if (db == NULL) {
245a9de470cSBruce Richardson printf("Error creating burst distributor\n");
246a9de470cSBruce Richardson return -1;
247a9de470cSBruce Richardson }
248a9de470cSBruce Richardson } else {
249a9de470cSBruce Richardson rte_distributor_clear_returns(db);
250a9de470cSBruce Richardson }
251a9de470cSBruce Richardson
252a9de470cSBruce Richardson const unsigned nb_bufs = (511 * rte_lcore_count()) < BIG_BATCH ?
253a9de470cSBruce Richardson (BIG_BATCH * 2) - 1 : (511 * rte_lcore_count());
254a9de470cSBruce Richardson if (p == NULL) {
255a9de470cSBruce Richardson p = rte_pktmbuf_pool_create("DPT_MBUF_POOL", nb_bufs, BURST,
256a9de470cSBruce Richardson 0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id());
257a9de470cSBruce Richardson if (p == NULL) {
258a9de470cSBruce Richardson printf("Error creating mempool\n");
259a9de470cSBruce Richardson return -1;
260a9de470cSBruce Richardson }
261a9de470cSBruce Richardson }
262a9de470cSBruce Richardson
263a9de470cSBruce Richardson printf("=== Performance test of distributor (single mode) ===\n");
264cb056611SStephen Hemminger rte_eal_mp_remote_launch(handle_work, ds, SKIP_MAIN);
265a9de470cSBruce Richardson if (perf_test(ds, p) < 0)
266a9de470cSBruce Richardson return -1;
267a9de470cSBruce Richardson quit_workers(ds, p);
268a9de470cSBruce Richardson
269a9de470cSBruce Richardson printf("=== Performance test of distributor (burst mode) ===\n");
270cb056611SStephen Hemminger rte_eal_mp_remote_launch(handle_work, db, SKIP_MAIN);
271a9de470cSBruce Richardson if (perf_test(db, p) < 0)
272a9de470cSBruce Richardson return -1;
273a9de470cSBruce Richardson quit_workers(db, p);
274a9de470cSBruce Richardson
275a9de470cSBruce Richardson return 0;
276a9de470cSBruce Richardson }
277a9de470cSBruce Richardson
2783c60274cSJie Zhou #endif /* !RTE_EXEC_ENV_WINDOWS */
2793c60274cSJie Zhou
280e0a8442cSBruce Richardson REGISTER_PERF_TEST(distributor_perf_autotest, test_distributor_perf);
281