1cfe6fab0SGage Eads /* SPDX-License-Identifier: BSD-3-Clause
2cfe6fab0SGage Eads * Copyright(c) 2019 Intel Corporation
3cfe6fab0SGage Eads */
4cfe6fab0SGage Eads
5cfe6fab0SGage Eads
6cfe6fab0SGage Eads #include <stdio.h>
7cfe6fab0SGage Eads #include <inttypes.h>
8cfe6fab0SGage Eads
9cfe6fab0SGage Eads #include <rte_cycles.h>
10cfe6fab0SGage Eads #include <rte_launch.h>
11cfe6fab0SGage Eads #include <rte_pause.h>
12cfe6fab0SGage Eads #include <rte_stack.h>
13cfe6fab0SGage Eads
14cfe6fab0SGage Eads #include "test.h"
15cfe6fab0SGage Eads
16cfe6fab0SGage Eads #define STACK_NAME "STACK_PERF"
17cfe6fab0SGage Eads #define MAX_BURST 32
18cfe6fab0SGage Eads #define STACK_SIZE (RTE_MAX_LCORE * MAX_BURST)
19cfe6fab0SGage Eads
20cfe6fab0SGage Eads /*
21cfe6fab0SGage Eads * Push/pop bulk sizes, marked volatile so they aren't treated as compile-time
22cfe6fab0SGage Eads * constants.
23cfe6fab0SGage Eads */
24cfe6fab0SGage Eads static volatile unsigned int bulk_sizes[] = {8, MAX_BURST};
25cfe6fab0SGage Eads
26*b6a7e685STyler Retzlaff static RTE_ATOMIC(uint32_t) lcore_barrier;
27cfe6fab0SGage Eads
28cfe6fab0SGage Eads struct lcore_pair {
29cfe6fab0SGage Eads unsigned int c1;
30cfe6fab0SGage Eads unsigned int c2;
31cfe6fab0SGage Eads };
32cfe6fab0SGage Eads
33cfe6fab0SGage Eads static int
get_two_hyperthreads(struct lcore_pair * lcp)34cfe6fab0SGage Eads get_two_hyperthreads(struct lcore_pair *lcp)
35cfe6fab0SGage Eads {
36cfe6fab0SGage Eads unsigned int socket[2];
37cfe6fab0SGage Eads unsigned int core[2];
38cfe6fab0SGage Eads unsigned int id[2];
39cfe6fab0SGage Eads
40cfe6fab0SGage Eads RTE_LCORE_FOREACH(id[0]) {
41cfe6fab0SGage Eads RTE_LCORE_FOREACH(id[1]) {
42cfe6fab0SGage Eads if (id[0] == id[1])
43cfe6fab0SGage Eads continue;
44de307f7aSStephen Hemminger core[0] = rte_lcore_to_cpu_id(id[0]);
45de307f7aSStephen Hemminger core[1] = rte_lcore_to_cpu_id(id[1]);
46de307f7aSStephen Hemminger socket[0] = rte_lcore_to_socket_id(id[0]);
47de307f7aSStephen Hemminger socket[1] = rte_lcore_to_socket_id(id[1]);
48cfe6fab0SGage Eads if ((core[0] == core[1]) && (socket[0] == socket[1])) {
49cfe6fab0SGage Eads lcp->c1 = id[0];
50cfe6fab0SGage Eads lcp->c2 = id[1];
51cfe6fab0SGage Eads return 0;
52cfe6fab0SGage Eads }
53cfe6fab0SGage Eads }
54cfe6fab0SGage Eads }
55cfe6fab0SGage Eads
56cfe6fab0SGage Eads return 1;
57cfe6fab0SGage Eads }
58cfe6fab0SGage Eads
59cfe6fab0SGage Eads static int
get_two_cores(struct lcore_pair * lcp)60cfe6fab0SGage Eads get_two_cores(struct lcore_pair *lcp)
61cfe6fab0SGage Eads {
62cfe6fab0SGage Eads unsigned int socket[2];
63cfe6fab0SGage Eads unsigned int core[2];
64cfe6fab0SGage Eads unsigned int id[2];
65cfe6fab0SGage Eads
66cfe6fab0SGage Eads RTE_LCORE_FOREACH(id[0]) {
67cfe6fab0SGage Eads RTE_LCORE_FOREACH(id[1]) {
68cfe6fab0SGage Eads if (id[0] == id[1])
69cfe6fab0SGage Eads continue;
70de307f7aSStephen Hemminger core[0] = rte_lcore_to_cpu_id(id[0]);
71de307f7aSStephen Hemminger core[1] = rte_lcore_to_cpu_id(id[1]);
72de307f7aSStephen Hemminger socket[0] = rte_lcore_to_socket_id(id[0]);
73de307f7aSStephen Hemminger socket[1] = rte_lcore_to_socket_id(id[1]);
74cfe6fab0SGage Eads if ((core[0] != core[1]) && (socket[0] == socket[1])) {
75cfe6fab0SGage Eads lcp->c1 = id[0];
76cfe6fab0SGage Eads lcp->c2 = id[1];
77cfe6fab0SGage Eads return 0;
78cfe6fab0SGage Eads }
79cfe6fab0SGage Eads }
80cfe6fab0SGage Eads }
81cfe6fab0SGage Eads
82cfe6fab0SGage Eads return 1;
83cfe6fab0SGage Eads }
84cfe6fab0SGage Eads
85cfe6fab0SGage Eads static int
get_two_sockets(struct lcore_pair * lcp)86cfe6fab0SGage Eads get_two_sockets(struct lcore_pair *lcp)
87cfe6fab0SGage Eads {
88cfe6fab0SGage Eads unsigned int socket[2];
89cfe6fab0SGage Eads unsigned int id[2];
90cfe6fab0SGage Eads
91cfe6fab0SGage Eads RTE_LCORE_FOREACH(id[0]) {
92cfe6fab0SGage Eads RTE_LCORE_FOREACH(id[1]) {
93cfe6fab0SGage Eads if (id[0] == id[1])
94cfe6fab0SGage Eads continue;
95de307f7aSStephen Hemminger socket[0] = rte_lcore_to_socket_id(id[0]);
96de307f7aSStephen Hemminger socket[1] = rte_lcore_to_socket_id(id[1]);
97cfe6fab0SGage Eads if (socket[0] != socket[1]) {
98cfe6fab0SGage Eads lcp->c1 = id[0];
99cfe6fab0SGage Eads lcp->c2 = id[1];
100cfe6fab0SGage Eads return 0;
101cfe6fab0SGage Eads }
102cfe6fab0SGage Eads }
103cfe6fab0SGage Eads }
104cfe6fab0SGage Eads
105cfe6fab0SGage Eads return 1;
106cfe6fab0SGage Eads }
107cfe6fab0SGage Eads
108cfe6fab0SGage Eads /* Measure the cycle cost of popping an empty stack. */
109cfe6fab0SGage Eads static void
test_empty_pop(struct rte_stack * s)110cfe6fab0SGage Eads test_empty_pop(struct rte_stack *s)
111cfe6fab0SGage Eads {
112cfe6fab0SGage Eads unsigned int iterations = 100000000;
113cfe6fab0SGage Eads void *objs[MAX_BURST];
114cfe6fab0SGage Eads unsigned int i;
115cfe6fab0SGage Eads
116cfe6fab0SGage Eads uint64_t start = rte_rdtsc();
117cfe6fab0SGage Eads
118cfe6fab0SGage Eads for (i = 0; i < iterations; i++)
119cfe6fab0SGage Eads rte_stack_pop(s, objs, bulk_sizes[0]);
120cfe6fab0SGage Eads
121cfe6fab0SGage Eads uint64_t end = rte_rdtsc();
122cfe6fab0SGage Eads
123cfe6fab0SGage Eads printf("Stack empty pop: %.2F\n",
124cfe6fab0SGage Eads (double)(end - start) / iterations);
125cfe6fab0SGage Eads }
126cfe6fab0SGage Eads
127cfe6fab0SGage Eads struct thread_args {
128cfe6fab0SGage Eads struct rte_stack *s;
129cfe6fab0SGage Eads unsigned int sz;
130cfe6fab0SGage Eads double avg;
131cfe6fab0SGage Eads };
132cfe6fab0SGage Eads
133cfe6fab0SGage Eads /* Measure the average per-pointer cycle cost of stack push and pop */
134cfe6fab0SGage Eads static int
bulk_push_pop(void * p)135cfe6fab0SGage Eads bulk_push_pop(void *p)
136cfe6fab0SGage Eads {
137cfe6fab0SGage Eads unsigned int iterations = 1000000;
138cfe6fab0SGage Eads struct thread_args *args = p;
139cfe6fab0SGage Eads void *objs[MAX_BURST] = {0};
140cfe6fab0SGage Eads unsigned int size, i;
141cfe6fab0SGage Eads struct rte_stack *s;
142cfe6fab0SGage Eads
143cfe6fab0SGage Eads s = args->s;
144cfe6fab0SGage Eads size = args->sz;
145cfe6fab0SGage Eads
146*b6a7e685STyler Retzlaff rte_atomic_fetch_sub_explicit(&lcore_barrier, 1, rte_memory_order_relaxed);
147*b6a7e685STyler Retzlaff rte_wait_until_equal_32((uint32_t *)(uintptr_t)&lcore_barrier, 0, rte_memory_order_relaxed);
148cfe6fab0SGage Eads
149cfe6fab0SGage Eads uint64_t start = rte_rdtsc();
150cfe6fab0SGage Eads
151cfe6fab0SGage Eads for (i = 0; i < iterations; i++) {
152cfe6fab0SGage Eads rte_stack_push(s, objs, size);
153cfe6fab0SGage Eads rte_stack_pop(s, objs, size);
154cfe6fab0SGage Eads }
155cfe6fab0SGage Eads
156cfe6fab0SGage Eads uint64_t end = rte_rdtsc();
157cfe6fab0SGage Eads
158cfe6fab0SGage Eads args->avg = ((double)(end - start))/(iterations * size);
159cfe6fab0SGage Eads
160cfe6fab0SGage Eads return 0;
161cfe6fab0SGage Eads }
162cfe6fab0SGage Eads
163cfe6fab0SGage Eads /*
164cfe6fab0SGage Eads * Run bulk_push_pop() simultaneously on pairs of cores, to measure stack
165cfe6fab0SGage Eads * perf when between hyperthread siblings, cores on the same socket, and cores
166cfe6fab0SGage Eads * on different sockets.
167cfe6fab0SGage Eads */
168cfe6fab0SGage Eads static void
run_on_core_pair(struct lcore_pair * cores,struct rte_stack * s,lcore_function_t fn)169cfe6fab0SGage Eads run_on_core_pair(struct lcore_pair *cores, struct rte_stack *s,
170cfe6fab0SGage Eads lcore_function_t fn)
171cfe6fab0SGage Eads {
172cfe6fab0SGage Eads struct thread_args args[2];
173cfe6fab0SGage Eads unsigned int i;
174cfe6fab0SGage Eads
1758ada5b15SPavan Nikhilesh for (i = 0; i < RTE_DIM(bulk_sizes); i++) {
176*b6a7e685STyler Retzlaff rte_atomic_store_explicit(&lcore_barrier, 2, rte_memory_order_relaxed);
177cfe6fab0SGage Eads
178cfe6fab0SGage Eads args[0].sz = args[1].sz = bulk_sizes[i];
179cfe6fab0SGage Eads args[0].s = args[1].s = s;
180cfe6fab0SGage Eads
181cb056611SStephen Hemminger if (cores->c1 == rte_get_main_lcore()) {
182cfe6fab0SGage Eads rte_eal_remote_launch(fn, &args[1], cores->c2);
183cfe6fab0SGage Eads fn(&args[0]);
184cfe6fab0SGage Eads rte_eal_wait_lcore(cores->c2);
185cfe6fab0SGage Eads } else {
186cfe6fab0SGage Eads rte_eal_remote_launch(fn, &args[0], cores->c1);
187cfe6fab0SGage Eads rte_eal_remote_launch(fn, &args[1], cores->c2);
188cfe6fab0SGage Eads rte_eal_wait_lcore(cores->c1);
189cfe6fab0SGage Eads rte_eal_wait_lcore(cores->c2);
190cfe6fab0SGage Eads }
191cfe6fab0SGage Eads
192cfe6fab0SGage Eads printf("Average cycles per object push/pop (bulk size: %u): %.2F\n",
193cfe6fab0SGage Eads bulk_sizes[i], (args[0].avg + args[1].avg) / 2);
194cfe6fab0SGage Eads }
195cfe6fab0SGage Eads }
196cfe6fab0SGage Eads
197cfe6fab0SGage Eads /* Run bulk_push_pop() simultaneously on 1+ cores. */
198cfe6fab0SGage Eads static void
run_on_n_cores(struct rte_stack * s,lcore_function_t fn,int n)199cfe6fab0SGage Eads run_on_n_cores(struct rte_stack *s, lcore_function_t fn, int n)
200cfe6fab0SGage Eads {
201cfe6fab0SGage Eads struct thread_args args[RTE_MAX_LCORE];
202cfe6fab0SGage Eads unsigned int i;
203cfe6fab0SGage Eads
2048ada5b15SPavan Nikhilesh for (i = 0; i < RTE_DIM(bulk_sizes); i++) {
205cfe6fab0SGage Eads unsigned int lcore_id;
206cfe6fab0SGage Eads int cnt = 0;
207cfe6fab0SGage Eads double avg;
208cfe6fab0SGage Eads
209*b6a7e685STyler Retzlaff rte_atomic_store_explicit(&lcore_barrier, n, rte_memory_order_relaxed);
210cfe6fab0SGage Eads
211cb056611SStephen Hemminger RTE_LCORE_FOREACH_WORKER(lcore_id) {
212cfe6fab0SGage Eads if (++cnt >= n)
213cfe6fab0SGage Eads break;
214cfe6fab0SGage Eads
215cfe6fab0SGage Eads args[lcore_id].s = s;
216cfe6fab0SGage Eads args[lcore_id].sz = bulk_sizes[i];
217cfe6fab0SGage Eads
218cfe6fab0SGage Eads if (rte_eal_remote_launch(fn, &args[lcore_id],
219cfe6fab0SGage Eads lcore_id))
220cfe6fab0SGage Eads rte_panic("Failed to launch lcore %d\n",
221cfe6fab0SGage Eads lcore_id);
222cfe6fab0SGage Eads }
223cfe6fab0SGage Eads
224cfe6fab0SGage Eads lcore_id = rte_lcore_id();
225cfe6fab0SGage Eads
226cfe6fab0SGage Eads args[lcore_id].s = s;
227cfe6fab0SGage Eads args[lcore_id].sz = bulk_sizes[i];
228cfe6fab0SGage Eads
229cfe6fab0SGage Eads fn(&args[lcore_id]);
230cfe6fab0SGage Eads
231cfe6fab0SGage Eads rte_eal_mp_wait_lcore();
232cfe6fab0SGage Eads
233cfe6fab0SGage Eads avg = args[rte_lcore_id()].avg;
234cfe6fab0SGage Eads
235cfe6fab0SGage Eads cnt = 0;
236cb056611SStephen Hemminger RTE_LCORE_FOREACH_WORKER(lcore_id) {
237cfe6fab0SGage Eads if (++cnt >= n)
238cfe6fab0SGage Eads break;
239cfe6fab0SGage Eads avg += args[lcore_id].avg;
240cfe6fab0SGage Eads }
241cfe6fab0SGage Eads
242cfe6fab0SGage Eads printf("Average cycles per object push/pop (bulk size: %u): %.2F\n",
243cfe6fab0SGage Eads bulk_sizes[i], avg / n);
244cfe6fab0SGage Eads }
245cfe6fab0SGage Eads }
246cfe6fab0SGage Eads
247cfe6fab0SGage Eads /*
248cfe6fab0SGage Eads * Measure the cycle cost of pushing and popping a single pointer on a single
249cfe6fab0SGage Eads * lcore.
250cfe6fab0SGage Eads */
251cfe6fab0SGage Eads static void
test_single_push_pop(struct rte_stack * s)252cfe6fab0SGage Eads test_single_push_pop(struct rte_stack *s)
253cfe6fab0SGage Eads {
254cfe6fab0SGage Eads unsigned int iterations = 16000000;
255cfe6fab0SGage Eads void *obj = NULL;
256cfe6fab0SGage Eads unsigned int i;
257cfe6fab0SGage Eads
258cfe6fab0SGage Eads uint64_t start = rte_rdtsc();
259cfe6fab0SGage Eads
260cfe6fab0SGage Eads for (i = 0; i < iterations; i++) {
261cfe6fab0SGage Eads rte_stack_push(s, &obj, 1);
262cfe6fab0SGage Eads rte_stack_pop(s, &obj, 1);
263cfe6fab0SGage Eads }
264cfe6fab0SGage Eads
265cfe6fab0SGage Eads uint64_t end = rte_rdtsc();
266cfe6fab0SGage Eads
267cfe6fab0SGage Eads printf("Average cycles per single object push/pop: %.2F\n",
268cfe6fab0SGage Eads ((double)(end - start)) / iterations);
269cfe6fab0SGage Eads }
270cfe6fab0SGage Eads
271cfe6fab0SGage Eads /* Measure the cycle cost of bulk pushing and popping on a single lcore. */
272cfe6fab0SGage Eads static void
test_bulk_push_pop(struct rte_stack * s)273cfe6fab0SGage Eads test_bulk_push_pop(struct rte_stack *s)
274cfe6fab0SGage Eads {
275cfe6fab0SGage Eads unsigned int iterations = 8000000;
276cfe6fab0SGage Eads void *objs[MAX_BURST];
277cfe6fab0SGage Eads unsigned int sz, i;
278cfe6fab0SGage Eads
2798ada5b15SPavan Nikhilesh for (sz = 0; sz < RTE_DIM(bulk_sizes); sz++) {
280cfe6fab0SGage Eads uint64_t start = rte_rdtsc();
281cfe6fab0SGage Eads
282cfe6fab0SGage Eads for (i = 0; i < iterations; i++) {
283cfe6fab0SGage Eads rte_stack_push(s, objs, bulk_sizes[sz]);
284cfe6fab0SGage Eads rte_stack_pop(s, objs, bulk_sizes[sz]);
285cfe6fab0SGage Eads }
286cfe6fab0SGage Eads
287cfe6fab0SGage Eads uint64_t end = rte_rdtsc();
288cfe6fab0SGage Eads
289cfe6fab0SGage Eads double avg = ((double)(end - start) /
290cfe6fab0SGage Eads (iterations * bulk_sizes[sz]));
291cfe6fab0SGage Eads
292cfe6fab0SGage Eads printf("Average cycles per object push/pop (bulk size: %u): %.2F\n",
293cfe6fab0SGage Eads bulk_sizes[sz], avg);
294cfe6fab0SGage Eads }
295cfe6fab0SGage Eads }
296cfe6fab0SGage Eads
297cfe6fab0SGage Eads static int
__test_stack_perf(uint32_t flags)2980420378bSGage Eads __test_stack_perf(uint32_t flags)
299cfe6fab0SGage Eads {
300cfe6fab0SGage Eads struct lcore_pair cores;
301cfe6fab0SGage Eads struct rte_stack *s;
302cfe6fab0SGage Eads
303*b6a7e685STyler Retzlaff rte_atomic_store_explicit(&lcore_barrier, 0, rte_memory_order_relaxed);
304cfe6fab0SGage Eads
3050420378bSGage Eads s = rte_stack_create(STACK_NAME, STACK_SIZE, rte_socket_id(), flags);
306cfe6fab0SGage Eads if (s == NULL) {
307cfe6fab0SGage Eads printf("[%s():%u] failed to create a stack\n",
308cfe6fab0SGage Eads __func__, __LINE__);
309cfe6fab0SGage Eads return -1;
310cfe6fab0SGage Eads }
311cfe6fab0SGage Eads
312cfe6fab0SGage Eads printf("### Testing single element push/pop ###\n");
313cfe6fab0SGage Eads test_single_push_pop(s);
314cfe6fab0SGage Eads
315cfe6fab0SGage Eads printf("\n### Testing empty pop ###\n");
316cfe6fab0SGage Eads test_empty_pop(s);
317cfe6fab0SGage Eads
318cfe6fab0SGage Eads printf("\n### Testing using a single lcore ###\n");
319cfe6fab0SGage Eads test_bulk_push_pop(s);
320cfe6fab0SGage Eads
321cfe6fab0SGage Eads if (get_two_hyperthreads(&cores) == 0) {
322cfe6fab0SGage Eads printf("\n### Testing using two hyperthreads ###\n");
323cfe6fab0SGage Eads run_on_core_pair(&cores, s, bulk_push_pop);
324cfe6fab0SGage Eads }
325cfe6fab0SGage Eads if (get_two_cores(&cores) == 0) {
326cfe6fab0SGage Eads printf("\n### Testing using two physical cores ###\n");
327cfe6fab0SGage Eads run_on_core_pair(&cores, s, bulk_push_pop);
328cfe6fab0SGage Eads }
329cfe6fab0SGage Eads if (get_two_sockets(&cores) == 0) {
330cfe6fab0SGage Eads printf("\n### Testing using two NUMA nodes ###\n");
331cfe6fab0SGage Eads run_on_core_pair(&cores, s, bulk_push_pop);
332cfe6fab0SGage Eads }
333cfe6fab0SGage Eads
334cfe6fab0SGage Eads printf("\n### Testing on all %u lcores ###\n", rte_lcore_count());
335cfe6fab0SGage Eads run_on_n_cores(s, bulk_push_pop, rte_lcore_count());
336cfe6fab0SGage Eads
337cfe6fab0SGage Eads rte_stack_free(s);
338cfe6fab0SGage Eads return 0;
339cfe6fab0SGage Eads }
340cfe6fab0SGage Eads
3410420378bSGage Eads static int
test_stack_perf(void)3420420378bSGage Eads test_stack_perf(void)
3430420378bSGage Eads {
3440420378bSGage Eads return __test_stack_perf(0);
3450420378bSGage Eads }
3460420378bSGage Eads
3470420378bSGage Eads static int
test_lf_stack_perf(void)3480420378bSGage Eads test_lf_stack_perf(void)
3490420378bSGage Eads {
3501abb185dSStanislaw Kardach #if defined(RTE_STACK_LF_SUPPORTED)
3510420378bSGage Eads return __test_stack_perf(RTE_STACK_F_LF);
3521abb185dSStanislaw Kardach #else
3531abb185dSStanislaw Kardach return TEST_SKIPPED;
3541abb185dSStanislaw Kardach #endif
3550420378bSGage Eads }
3560420378bSGage Eads
357e0a8442cSBruce Richardson REGISTER_PERF_TEST(stack_perf_autotest, test_stack_perf);
358e0a8442cSBruce Richardson REGISTER_PERF_TEST(stack_lf_perf_autotest, test_lf_stack_perf);
359