xref: /dpdk/app/test/test_distributor_perf.c (revision b6a7e6852e9ab82ae0e05e2d2a0b83abca17de3b)
1a9de470cSBruce Richardson /* SPDX-License-Identifier: BSD-3-Clause
2a9de470cSBruce Richardson  * Copyright(c) 2010-2017 Intel Corporation
3a9de470cSBruce Richardson  */
4a9de470cSBruce Richardson 
5a9de470cSBruce Richardson #include "test.h"
6a9de470cSBruce Richardson 
7a9de470cSBruce Richardson #include <unistd.h>
8a9de470cSBruce Richardson #include <string.h>
9a9de470cSBruce Richardson #include <rte_mempool.h>
10a9de470cSBruce Richardson #include <rte_cycles.h>
11a9de470cSBruce Richardson #include <rte_common.h>
12a9de470cSBruce Richardson #include <rte_mbuf.h>
133c60274cSJie Zhou 
143c60274cSJie Zhou #ifdef RTE_EXEC_ENV_WINDOWS
153c60274cSJie Zhou static int
test_distributor_perf(void)163c60274cSJie Zhou test_distributor_perf(void)
173c60274cSJie Zhou {
183c60274cSJie Zhou 	printf("distributor perf not supported on Windows, skipping test\n");
193c60274cSJie Zhou 	return TEST_SKIPPED;
203c60274cSJie Zhou }
213c60274cSJie Zhou 
223c60274cSJie Zhou #else
233c60274cSJie Zhou 
24a9de470cSBruce Richardson #include <rte_distributor.h>
25a9de470cSBruce Richardson #include <rte_pause.h>
26a9de470cSBruce Richardson 
27a9de470cSBruce Richardson #define ITER_POWER_CL 25 /* log 2 of how many iterations  for Cache Line test */
28a9de470cSBruce Richardson #define ITER_POWER 21 /* log 2 of how many iterations we do when timing. */
29a9de470cSBruce Richardson #define BURST 64
30a9de470cSBruce Richardson #define BIG_BATCH 1024
31a9de470cSBruce Richardson 
32a9de470cSBruce Richardson /* static vars - zero initialized by default */
33a9de470cSBruce Richardson static volatile int quit;
34*b6a7e685STyler Retzlaff static volatile RTE_ATOMIC(unsigned int) worker_idx;
35a9de470cSBruce Richardson 
360efea35aSTyler Retzlaff struct __rte_cache_aligned worker_stats {
37a9de470cSBruce Richardson 	volatile unsigned handled_packets;
380efea35aSTyler Retzlaff };
39d1705276SFerruh Yigit static struct worker_stats worker_stats[RTE_MAX_LCORE];
40a9de470cSBruce Richardson 
41a9de470cSBruce Richardson /*
42a9de470cSBruce Richardson  * worker thread used for testing the time to do a round-trip of a cache
43a9de470cSBruce Richardson  * line between two cores and back again
44a9de470cSBruce Richardson  */
45a9de470cSBruce Richardson static int
flip_bit(volatile uint64_t * arg)46a9de470cSBruce Richardson flip_bit(volatile uint64_t *arg)
47a9de470cSBruce Richardson {
48a9de470cSBruce Richardson 	uint64_t old_val = 0;
49a9de470cSBruce Richardson 	while (old_val != 2) {
50a9de470cSBruce Richardson 		while (!*arg)
51a9de470cSBruce Richardson 			rte_pause();
52a9de470cSBruce Richardson 		old_val = *arg;
53a9de470cSBruce Richardson 		*arg = 0;
54a9de470cSBruce Richardson 	}
55a9de470cSBruce Richardson 	return 0;
56a9de470cSBruce Richardson }
57a9de470cSBruce Richardson 
58a9de470cSBruce Richardson /*
59a9de470cSBruce Richardson  * test case to time the number of cycles to round-trip a cache line between
60a9de470cSBruce Richardson  * two cores and back again.
61a9de470cSBruce Richardson  */
62a9de470cSBruce Richardson static void
time_cache_line_switch(void)63a9de470cSBruce Richardson time_cache_line_switch(void)
64a9de470cSBruce Richardson {
65a9de470cSBruce Richardson 	/* allocate a full cache line for data, we use only first byte of it */
66a9de470cSBruce Richardson 	uint64_t data[RTE_CACHE_LINE_SIZE*3 / sizeof(uint64_t)];
67a9de470cSBruce Richardson 
68cb056611SStephen Hemminger 	unsigned int i, workerid = rte_get_next_lcore(rte_lcore_id(), 0, 0);
69a9de470cSBruce Richardson 	volatile uint64_t *pdata = &data[0];
70a9de470cSBruce Richardson 	*pdata = 1;
71cb056611SStephen Hemminger 	rte_eal_remote_launch((lcore_function_t *)flip_bit, &data[0], workerid);
72a9de470cSBruce Richardson 	while (*pdata)
73a9de470cSBruce Richardson 		rte_pause();
74a9de470cSBruce Richardson 
75a9de470cSBruce Richardson 	const uint64_t start_time = rte_rdtsc();
76a9de470cSBruce Richardson 	for (i = 0; i < (1 << ITER_POWER_CL); i++) {
77a9de470cSBruce Richardson 		while (*pdata)
78a9de470cSBruce Richardson 			rte_pause();
79a9de470cSBruce Richardson 		*pdata = 1;
80a9de470cSBruce Richardson 	}
81a9de470cSBruce Richardson 	const uint64_t end_time = rte_rdtsc();
82a9de470cSBruce Richardson 
83a9de470cSBruce Richardson 	while (*pdata)
84a9de470cSBruce Richardson 		rte_pause();
85a9de470cSBruce Richardson 	*pdata = 2;
86cb056611SStephen Hemminger 	rte_eal_wait_lcore(workerid);
87a9de470cSBruce Richardson 	printf("==== Cache line switch test ===\n");
88a9de470cSBruce Richardson 	printf("Time for %u iterations = %"PRIu64" ticks\n", (1<<ITER_POWER_CL),
89a9de470cSBruce Richardson 			end_time-start_time);
90a9de470cSBruce Richardson 	printf("Ticks per iteration = %"PRIu64"\n\n",
91a9de470cSBruce Richardson 			(end_time-start_time) >> ITER_POWER_CL);
92a9de470cSBruce Richardson }
93a9de470cSBruce Richardson 
94a9de470cSBruce Richardson /*
95a9de470cSBruce Richardson  * returns the total count of the number of packets handled by the worker
96a9de470cSBruce Richardson  * functions given below.
97a9de470cSBruce Richardson  */
98a9de470cSBruce Richardson static unsigned
total_packet_count(void)99a9de470cSBruce Richardson total_packet_count(void)
100a9de470cSBruce Richardson {
101a9de470cSBruce Richardson 	unsigned i, count = 0;
102a9de470cSBruce Richardson 	for (i = 0; i < worker_idx; i++)
103a9de470cSBruce Richardson 		count += worker_stats[i].handled_packets;
104a9de470cSBruce Richardson 	return count;
105a9de470cSBruce Richardson }
106a9de470cSBruce Richardson 
107a9de470cSBruce Richardson /* resets the packet counts for a new test */
108a9de470cSBruce Richardson static void
clear_packet_count(void)109a9de470cSBruce Richardson clear_packet_count(void)
110a9de470cSBruce Richardson {
111a9de470cSBruce Richardson 	memset(&worker_stats, 0, sizeof(worker_stats));
112a9de470cSBruce Richardson }
113a9de470cSBruce Richardson 
114a9de470cSBruce Richardson /*
115a9de470cSBruce Richardson  * This is the basic worker function for performance tests.
116a9de470cSBruce Richardson  * it does nothing but return packets and count them.
117a9de470cSBruce Richardson  */
118a9de470cSBruce Richardson static int
handle_work(void * arg)119a9de470cSBruce Richardson handle_work(void *arg)
120a9de470cSBruce Richardson {
121a9de470cSBruce Richardson 	struct rte_distributor *d = arg;
122a9de470cSBruce Richardson 	unsigned int num = 0;
123a9de470cSBruce Richardson 	int i;
124*b6a7e685STyler Retzlaff 	unsigned int id = rte_atomic_fetch_add_explicit(&worker_idx, 1, rte_memory_order_relaxed);
1250efea35aSTyler Retzlaff 	alignas(RTE_CACHE_LINE_SIZE) struct rte_mbuf *buf[8];
126a9de470cSBruce Richardson 
127a9de470cSBruce Richardson 	for (i = 0; i < 8; i++)
128a9de470cSBruce Richardson 		buf[i] = NULL;
129a9de470cSBruce Richardson 
130a9de470cSBruce Richardson 	num = rte_distributor_get_pkt(d, id, buf, buf, num);
131a9de470cSBruce Richardson 	while (!quit) {
132a9de470cSBruce Richardson 		worker_stats[id].handled_packets += num;
133a9de470cSBruce Richardson 		num = rte_distributor_get_pkt(d, id, buf, buf, num);
134a9de470cSBruce Richardson 	}
135a9de470cSBruce Richardson 	worker_stats[id].handled_packets += num;
136a9de470cSBruce Richardson 	rte_distributor_return_pkt(d, id, buf, num);
137a9de470cSBruce Richardson 	return 0;
138a9de470cSBruce Richardson }
139a9de470cSBruce Richardson 
140a9de470cSBruce Richardson /*
141a9de470cSBruce Richardson  * This basic performance test just repeatedly sends in 32 packets at a time
142a9de470cSBruce Richardson  * to the distributor and verifies at the end that we got them all in the worker
143a9de470cSBruce Richardson  * threads and finally how long per packet the processing took.
144a9de470cSBruce Richardson  */
145a9de470cSBruce Richardson static inline int
perf_test(struct rte_distributor * d,struct rte_mempool * p)146a9de470cSBruce Richardson perf_test(struct rte_distributor *d, struct rte_mempool *p)
147a9de470cSBruce Richardson {
148a9de470cSBruce Richardson 	unsigned int i;
149a9de470cSBruce Richardson 	uint64_t start, end;
150a9de470cSBruce Richardson 	struct rte_mbuf *bufs[BURST];
151a9de470cSBruce Richardson 
152a9de470cSBruce Richardson 	clear_packet_count();
153a9de470cSBruce Richardson 	if (rte_mempool_get_bulk(p, (void *)bufs, BURST) != 0) {
154a9de470cSBruce Richardson 		printf("Error getting mbufs from pool\n");
155a9de470cSBruce Richardson 		return -1;
156a9de470cSBruce Richardson 	}
157a9de470cSBruce Richardson 	/* ensure we have different hash value for each pkt */
158a9de470cSBruce Richardson 	for (i = 0; i < BURST; i++)
159a9de470cSBruce Richardson 		bufs[i]->hash.usr = i;
160a9de470cSBruce Richardson 
161a9de470cSBruce Richardson 	start = rte_rdtsc();
162a9de470cSBruce Richardson 	for (i = 0; i < (1<<ITER_POWER); i++)
163a9de470cSBruce Richardson 		rte_distributor_process(d, bufs, BURST);
164a9de470cSBruce Richardson 	end = rte_rdtsc();
165a9de470cSBruce Richardson 
166a9de470cSBruce Richardson 	do {
167a9de470cSBruce Richardson 		usleep(100);
168a9de470cSBruce Richardson 		rte_distributor_process(d, NULL, 0);
169a9de470cSBruce Richardson 	} while (total_packet_count() < (BURST << ITER_POWER));
170a9de470cSBruce Richardson 
171a9de470cSBruce Richardson 	rte_distributor_clear_returns(d);
172a9de470cSBruce Richardson 
173a9de470cSBruce Richardson 	printf("Time per burst:  %"PRIu64"\n", (end - start) >> ITER_POWER);
174a9de470cSBruce Richardson 	printf("Time per packet: %"PRIu64"\n\n",
175a9de470cSBruce Richardson 			((end - start) >> ITER_POWER)/BURST);
176a9de470cSBruce Richardson 	rte_mempool_put_bulk(p, (void *)bufs, BURST);
177a9de470cSBruce Richardson 
178a9de470cSBruce Richardson 	for (i = 0; i < rte_lcore_count() - 1; i++)
179a9de470cSBruce Richardson 		printf("Worker %u handled %u packets\n", i,
180a9de470cSBruce Richardson 				worker_stats[i].handled_packets);
181a9de470cSBruce Richardson 	printf("Total packets: %u (%x)\n", total_packet_count(),
182a9de470cSBruce Richardson 			total_packet_count());
183a9de470cSBruce Richardson 	printf("=== Perf test done ===\n\n");
184a9de470cSBruce Richardson 
185a9de470cSBruce Richardson 	return 0;
186a9de470cSBruce Richardson }
187a9de470cSBruce Richardson 
188a9de470cSBruce Richardson /* Useful function which ensures that all worker functions terminate */
189a9de470cSBruce Richardson static void
quit_workers(struct rte_distributor * d,struct rte_mempool * p)190a9de470cSBruce Richardson quit_workers(struct rte_distributor *d, struct rte_mempool *p)
191a9de470cSBruce Richardson {
192a9de470cSBruce Richardson 	const unsigned int num_workers = rte_lcore_count() - 1;
193a9de470cSBruce Richardson 	unsigned int i;
194a9de470cSBruce Richardson 	struct rte_mbuf *bufs[RTE_MAX_LCORE];
195a9de470cSBruce Richardson 
196a9de470cSBruce Richardson 	rte_mempool_get_bulk(p, (void *)bufs, num_workers);
197a9de470cSBruce Richardson 
198a9de470cSBruce Richardson 	quit = 1;
19900dac9a9SStanislaw Kardach 	for (i = 0; i < num_workers; i++) {
200a9de470cSBruce Richardson 		bufs[i]->hash.usr = i << 1;
20100dac9a9SStanislaw Kardach 		rte_distributor_process(d, &bufs[i], 1);
20200dac9a9SStanislaw Kardach 	}
203a9de470cSBruce Richardson 
204a9de470cSBruce Richardson 	rte_mempool_put_bulk(p, (void *)bufs, num_workers);
205a9de470cSBruce Richardson 
206a9de470cSBruce Richardson 	rte_distributor_process(d, NULL, 0);
2076cda39afSStanislaw Kardach 	rte_distributor_flush(d);
208a9de470cSBruce Richardson 	rte_eal_mp_wait_lcore();
209a9de470cSBruce Richardson 	quit = 0;
210a9de470cSBruce Richardson 	worker_idx = 0;
211a9de470cSBruce Richardson }
212a9de470cSBruce Richardson 
213a9de470cSBruce Richardson static int
test_distributor_perf(void)214a9de470cSBruce Richardson test_distributor_perf(void)
215a9de470cSBruce Richardson {
216a9de470cSBruce Richardson 	static struct rte_distributor *ds;
217a9de470cSBruce Richardson 	static struct rte_distributor *db;
218a9de470cSBruce Richardson 	static struct rte_mempool *p;
219a9de470cSBruce Richardson 
220a9de470cSBruce Richardson 	if (rte_lcore_count() < 2) {
221e0f4a0edSDavid Marchand 		printf("Not enough cores for distributor_perf_autotest, expecting at least 2\n");
222e0f4a0edSDavid Marchand 		return TEST_SKIPPED;
223a9de470cSBruce Richardson 	}
224a9de470cSBruce Richardson 
225a9de470cSBruce Richardson 	/* first time how long it takes to round-trip a cache line */
226a9de470cSBruce Richardson 	time_cache_line_switch();
227a9de470cSBruce Richardson 
228a9de470cSBruce Richardson 	if (ds == NULL) {
229a9de470cSBruce Richardson 		ds = rte_distributor_create("Test_perf", rte_socket_id(),
230a9de470cSBruce Richardson 				rte_lcore_count() - 1,
231a9de470cSBruce Richardson 				RTE_DIST_ALG_SINGLE);
232a9de470cSBruce Richardson 		if (ds == NULL) {
233a9de470cSBruce Richardson 			printf("Error creating distributor\n");
234a9de470cSBruce Richardson 			return -1;
235a9de470cSBruce Richardson 		}
236a9de470cSBruce Richardson 	} else {
237a9de470cSBruce Richardson 		rte_distributor_clear_returns(ds);
238a9de470cSBruce Richardson 	}
239a9de470cSBruce Richardson 
240a9de470cSBruce Richardson 	if (db == NULL) {
241a9de470cSBruce Richardson 		db = rte_distributor_create("Test_burst", rte_socket_id(),
242a9de470cSBruce Richardson 				rte_lcore_count() - 1,
243a9de470cSBruce Richardson 				RTE_DIST_ALG_BURST);
244a9de470cSBruce Richardson 		if (db == NULL) {
245a9de470cSBruce Richardson 			printf("Error creating burst distributor\n");
246a9de470cSBruce Richardson 			return -1;
247a9de470cSBruce Richardson 		}
248a9de470cSBruce Richardson 	} else {
249a9de470cSBruce Richardson 		rte_distributor_clear_returns(db);
250a9de470cSBruce Richardson 	}
251a9de470cSBruce Richardson 
252a9de470cSBruce Richardson 	const unsigned nb_bufs = (511 * rte_lcore_count()) < BIG_BATCH ?
253a9de470cSBruce Richardson 			(BIG_BATCH * 2) - 1 : (511 * rte_lcore_count());
254a9de470cSBruce Richardson 	if (p == NULL) {
255a9de470cSBruce Richardson 		p = rte_pktmbuf_pool_create("DPT_MBUF_POOL", nb_bufs, BURST,
256a9de470cSBruce Richardson 			0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id());
257a9de470cSBruce Richardson 		if (p == NULL) {
258a9de470cSBruce Richardson 			printf("Error creating mempool\n");
259a9de470cSBruce Richardson 			return -1;
260a9de470cSBruce Richardson 		}
261a9de470cSBruce Richardson 	}
262a9de470cSBruce Richardson 
263a9de470cSBruce Richardson 	printf("=== Performance test of distributor (single mode) ===\n");
264cb056611SStephen Hemminger 	rte_eal_mp_remote_launch(handle_work, ds, SKIP_MAIN);
265a9de470cSBruce Richardson 	if (perf_test(ds, p) < 0)
266a9de470cSBruce Richardson 		return -1;
267a9de470cSBruce Richardson 	quit_workers(ds, p);
268a9de470cSBruce Richardson 
269a9de470cSBruce Richardson 	printf("=== Performance test of distributor (burst mode) ===\n");
270cb056611SStephen Hemminger 	rte_eal_mp_remote_launch(handle_work, db, SKIP_MAIN);
271a9de470cSBruce Richardson 	if (perf_test(db, p) < 0)
272a9de470cSBruce Richardson 		return -1;
273a9de470cSBruce Richardson 	quit_workers(db, p);
274a9de470cSBruce Richardson 
275a9de470cSBruce Richardson 	return 0;
276a9de470cSBruce Richardson }
277a9de470cSBruce Richardson 
2783c60274cSJie Zhou #endif /* !RTE_EXEC_ENV_WINDOWS */
2793c60274cSJie Zhou 
280e0a8442cSBruce Richardson REGISTER_PERF_TEST(distributor_perf_autotest, test_distributor_perf);
281