1a9de470cSBruce Richardson /* SPDX-License-Identifier: BSD-3-Clause
2a9de470cSBruce Richardson * Copyright(c) 2010-2018 Intel Corporation
3a9de470cSBruce Richardson */
4a9de470cSBruce Richardson
5a9de470cSBruce Richardson /*
6a9de470cSBruce Richardson * This is a simple functional test for rte_smp_mb() implementation.
7a9de470cSBruce Richardson * I.E. make sure that LOAD and STORE operations that precede the
8a9de470cSBruce Richardson * rte_smp_mb() call are globally visible across the lcores
9b53d106dSSean Morrissey * before the LOAD and STORE operations that follows it.
10a9de470cSBruce Richardson * The test uses simple implementation of Peterson's lock algorithm
11a9de470cSBruce Richardson * (https://en.wikipedia.org/wiki/Peterson%27s_algorithm)
12a9de470cSBruce Richardson * for two execution units to make sure that rte_smp_mb() prevents
13a9de470cSBruce Richardson * store-load reordering to happen.
147be78d02SJosh Soref * Also when executed on a single lcore could be used as a approximate
15a9de470cSBruce Richardson * estimation of number of cycles particular implementation of rte_smp_mb()
16a9de470cSBruce Richardson * will take.
17a9de470cSBruce Richardson */
18a9de470cSBruce Richardson
19a9de470cSBruce Richardson #include <stdio.h>
20a9de470cSBruce Richardson #include <string.h>
21a9de470cSBruce Richardson #include <stdint.h>
22a9de470cSBruce Richardson #include <inttypes.h>
23a9de470cSBruce Richardson
24a9de470cSBruce Richardson #include <rte_memory.h>
25a9de470cSBruce Richardson #include <rte_per_lcore.h>
26a9de470cSBruce Richardson #include <rte_launch.h>
27a9de470cSBruce Richardson #include <rte_eal.h>
28a9de470cSBruce Richardson #include <rte_lcore.h>
29a9de470cSBruce Richardson #include <rte_pause.h>
30a9de470cSBruce Richardson #include <rte_random.h>
31a9de470cSBruce Richardson #include <rte_cycles.h>
32a9de470cSBruce Richardson #include <rte_vect.h>
33a9de470cSBruce Richardson #include <rte_debug.h>
34a9de470cSBruce Richardson
35a9de470cSBruce Richardson #include "test.h"
36a9de470cSBruce Richardson
37a9de470cSBruce Richardson #define ADD_MAX 8
38a9de470cSBruce Richardson #define ITER_MAX 0x1000000
39a9de470cSBruce Richardson
40a9de470cSBruce Richardson enum plock_use_type {
41a9de470cSBruce Richardson USE_MB,
42a9de470cSBruce Richardson USE_SMP_MB,
43a9de470cSBruce Richardson USE_NUM
44a9de470cSBruce Richardson };
45a9de470cSBruce Richardson
46a9de470cSBruce Richardson struct plock {
47a9de470cSBruce Richardson volatile uint32_t flag[2];
48a9de470cSBruce Richardson volatile uint32_t victim;
49a9de470cSBruce Richardson enum plock_use_type utype;
50a9de470cSBruce Richardson };
51a9de470cSBruce Richardson
52a9de470cSBruce Richardson /*
53a9de470cSBruce Richardson * Lock plus protected by it two counters.
54a9de470cSBruce Richardson */
55a9de470cSBruce Richardson struct plock_test {
56a9de470cSBruce Richardson struct plock lock;
57f8f207c0SDavid Christensen uint64_t val;
58f8f207c0SDavid Christensen uint64_t iter;
59a9de470cSBruce Richardson };
60a9de470cSBruce Richardson
61a9de470cSBruce Richardson /*
62a9de470cSBruce Richardson * Each active lcore shares plock_test struct with it's left and right
63a9de470cSBruce Richardson * neighbours.
64a9de470cSBruce Richardson */
65a9de470cSBruce Richardson struct lcore_plock_test {
66a9de470cSBruce Richardson struct plock_test *pt[2]; /* shared, lock-protected data */
67f8f207c0SDavid Christensen uint64_t sum[2]; /* local copy of the shared data */
684a6672c2SStephen Hemminger uint64_t iter; /* number of iterations to perform */
69a9de470cSBruce Richardson uint32_t lc; /* given lcore id */
70a9de470cSBruce Richardson };
71a9de470cSBruce Richardson
72a9de470cSBruce Richardson static inline void
store_load_barrier(uint32_t utype)73a9de470cSBruce Richardson store_load_barrier(uint32_t utype)
74a9de470cSBruce Richardson {
75a9de470cSBruce Richardson if (utype == USE_MB)
76a9de470cSBruce Richardson rte_mb();
77a9de470cSBruce Richardson else if (utype == USE_SMP_MB)
78a9de470cSBruce Richardson rte_smp_mb();
79a9de470cSBruce Richardson else
80a9de470cSBruce Richardson RTE_VERIFY(0);
81a9de470cSBruce Richardson }
82a9de470cSBruce Richardson
83a9de470cSBruce Richardson /*
84a9de470cSBruce Richardson * Peterson lock implementation.
85a9de470cSBruce Richardson */
86a9de470cSBruce Richardson static void
plock_lock(struct plock * l,uint32_t self)87a9de470cSBruce Richardson plock_lock(struct plock *l, uint32_t self)
88a9de470cSBruce Richardson {
89a9de470cSBruce Richardson uint32_t other;
90a9de470cSBruce Richardson
91a9de470cSBruce Richardson other = self ^ 1;
92a9de470cSBruce Richardson
93a9de470cSBruce Richardson l->flag[self] = 1;
942d119c3bSDavid Christensen rte_smp_wmb();
95a9de470cSBruce Richardson l->victim = self;
96a9de470cSBruce Richardson
97a9de470cSBruce Richardson store_load_barrier(l->utype);
98a9de470cSBruce Richardson
99a9de470cSBruce Richardson while (l->flag[other] == 1 && l->victim == self)
100a9de470cSBruce Richardson rte_pause();
1012d119c3bSDavid Christensen rte_smp_rmb();
102a9de470cSBruce Richardson }
103a9de470cSBruce Richardson
104a9de470cSBruce Richardson static void
plock_unlock(struct plock * l,uint32_t self)105a9de470cSBruce Richardson plock_unlock(struct plock *l, uint32_t self)
106a9de470cSBruce Richardson {
107a9de470cSBruce Richardson rte_smp_wmb();
108a9de470cSBruce Richardson l->flag[self] = 0;
109a9de470cSBruce Richardson }
110a9de470cSBruce Richardson
111a9de470cSBruce Richardson static void
plock_reset(struct plock * l,enum plock_use_type utype)112a9de470cSBruce Richardson plock_reset(struct plock *l, enum plock_use_type utype)
113a9de470cSBruce Richardson {
114a9de470cSBruce Richardson memset(l, 0, sizeof(*l));
115a9de470cSBruce Richardson l->utype = utype;
116a9de470cSBruce Richardson }
117a9de470cSBruce Richardson
118a9de470cSBruce Richardson /*
119a9de470cSBruce Richardson * grab the lock, update both counters, release the lock.
120a9de470cSBruce Richardson */
121a9de470cSBruce Richardson static void
plock_add(struct plock_test * pt,uint32_t self,uint32_t n)122a9de470cSBruce Richardson plock_add(struct plock_test *pt, uint32_t self, uint32_t n)
123a9de470cSBruce Richardson {
124a9de470cSBruce Richardson plock_lock(&pt->lock, self);
125a9de470cSBruce Richardson pt->iter++;
126a9de470cSBruce Richardson pt->val += n;
127a9de470cSBruce Richardson plock_unlock(&pt->lock, self);
128a9de470cSBruce Richardson }
129a9de470cSBruce Richardson
130a9de470cSBruce Richardson static int
plock_test1_lcore(void * data)131a9de470cSBruce Richardson plock_test1_lcore(void *data)
132a9de470cSBruce Richardson {
133a9de470cSBruce Richardson uint64_t tm;
134f8f207c0SDavid Christensen uint32_t lc, ln;
135f8f207c0SDavid Christensen uint64_t i, n;
136a9de470cSBruce Richardson struct lcore_plock_test *lpt;
137a9de470cSBruce Richardson
138a9de470cSBruce Richardson lpt = data;
139a9de470cSBruce Richardson lc = rte_lcore_id();
140a9de470cSBruce Richardson
141a9de470cSBruce Richardson /* find lcore_plock_test struct for given lcore */
142a9de470cSBruce Richardson for (ln = rte_lcore_count(); ln != 0 && lpt->lc != lc; lpt++, ln--)
143a9de470cSBruce Richardson ;
144a9de470cSBruce Richardson
145a9de470cSBruce Richardson if (ln == 0) {
146a9de470cSBruce Richardson printf("%s(%u) error at init\n", __func__, lc);
147a9de470cSBruce Richardson return -1;
148a9de470cSBruce Richardson }
149a9de470cSBruce Richardson
150a9de470cSBruce Richardson n = rte_rand() % ADD_MAX;
151a9de470cSBruce Richardson tm = rte_get_timer_cycles();
152a9de470cSBruce Richardson
153a9de470cSBruce Richardson /*
154a9de470cSBruce Richardson * for each iteration:
155a9de470cSBruce Richardson * - update shared, locked protected data in a safe manner
156a9de470cSBruce Richardson * - update local copy of the shared data
157a9de470cSBruce Richardson */
158a9de470cSBruce Richardson for (i = 0; i != lpt->iter; i++) {
159a9de470cSBruce Richardson
160a9de470cSBruce Richardson plock_add(lpt->pt[0], 0, n);
161a9de470cSBruce Richardson plock_add(lpt->pt[1], 1, n);
162a9de470cSBruce Richardson
163a9de470cSBruce Richardson lpt->sum[0] += n;
164a9de470cSBruce Richardson lpt->sum[1] += n;
165a9de470cSBruce Richardson
166a9de470cSBruce Richardson n = (n + 1) % ADD_MAX;
167a9de470cSBruce Richardson }
168a9de470cSBruce Richardson
169a9de470cSBruce Richardson tm = rte_get_timer_cycles() - tm;
170a9de470cSBruce Richardson
171f8f207c0SDavid Christensen printf("%s(%u): %" PRIu64 " iterations finished, in %" PRIu64
172a9de470cSBruce Richardson " cycles, %#Lf cycles/iteration, "
173f8f207c0SDavid Christensen "local sum={%" PRIu64 ", %" PRIu64 "}\n",
174a9de470cSBruce Richardson __func__, lc, i, tm, (long double)tm / i,
175a9de470cSBruce Richardson lpt->sum[0], lpt->sum[1]);
176a9de470cSBruce Richardson return 0;
177a9de470cSBruce Richardson }
178a9de470cSBruce Richardson
179a9de470cSBruce Richardson /*
180a9de470cSBruce Richardson * For N active lcores we allocate N+1 lcore_plock_test structures.
181a9de470cSBruce Richardson * Each active lcore shares one lcore_plock_test structure with its
182a9de470cSBruce Richardson * left lcore neighbor and one lcore_plock_test structure with its
183a9de470cSBruce Richardson * right lcore neighbor.
184a9de470cSBruce Richardson * During the test each lcore updates data in both shared structures and
185a9de470cSBruce Richardson * its local copies. Then at validation phase we check that our shared
186a9de470cSBruce Richardson * and local data are the same.
187a9de470cSBruce Richardson */
188a9de470cSBruce Richardson static int
plock_test(uint64_t iter,enum plock_use_type utype)189f8f207c0SDavid Christensen plock_test(uint64_t iter, enum plock_use_type utype)
190a9de470cSBruce Richardson {
191a9de470cSBruce Richardson int32_t rc;
192a9de470cSBruce Richardson uint32_t i, lc, n;
193f8f207c0SDavid Christensen uint64_t *sum;
194a9de470cSBruce Richardson struct plock_test *pt;
195a9de470cSBruce Richardson struct lcore_plock_test *lpt;
196a9de470cSBruce Richardson
197a9de470cSBruce Richardson /* init phase, allocate and initialize shared data */
198a9de470cSBruce Richardson
199a9de470cSBruce Richardson n = rte_lcore_count();
200a9de470cSBruce Richardson pt = calloc(n + 1, sizeof(*pt));
201a9de470cSBruce Richardson lpt = calloc(n, sizeof(*lpt));
202a9de470cSBruce Richardson sum = calloc(n + 1, sizeof(*sum));
203a9de470cSBruce Richardson
204f8f207c0SDavid Christensen printf("%s(iter=%" PRIu64 ", utype=%u) started on %u lcores\n",
205a9de470cSBruce Richardson __func__, iter, utype, n);
206a9de470cSBruce Richardson
20750882a1eSDavid Christensen if (pt == NULL || lpt == NULL || sum == NULL) {
208a9de470cSBruce Richardson printf("%s: failed to allocate memory for %u lcores\n",
209a9de470cSBruce Richardson __func__, n);
210a9de470cSBruce Richardson free(pt);
211a9de470cSBruce Richardson free(lpt);
212a9de470cSBruce Richardson free(sum);
213a9de470cSBruce Richardson return -ENOMEM;
214a9de470cSBruce Richardson }
215a9de470cSBruce Richardson
216a9de470cSBruce Richardson for (i = 0; i != n + 1; i++)
217a9de470cSBruce Richardson plock_reset(&pt[i].lock, utype);
218a9de470cSBruce Richardson
219a9de470cSBruce Richardson i = 0;
220a9de470cSBruce Richardson RTE_LCORE_FOREACH(lc) {
221a9de470cSBruce Richardson
222a9de470cSBruce Richardson lpt[i].lc = lc;
223a9de470cSBruce Richardson lpt[i].iter = iter;
224a9de470cSBruce Richardson lpt[i].pt[0] = pt + i;
225a9de470cSBruce Richardson lpt[i].pt[1] = pt + i + 1;
226a9de470cSBruce Richardson i++;
227a9de470cSBruce Richardson }
228a9de470cSBruce Richardson
229a9de470cSBruce Richardson lpt[i - 1].pt[1] = pt;
230a9de470cSBruce Richardson
231a9de470cSBruce Richardson for (i = 0; i != n; i++)
232a9de470cSBruce Richardson printf("lpt[%u]={lc=%u, pt={%p, %p},};\n",
233a9de470cSBruce Richardson i, lpt[i].lc, lpt[i].pt[0], lpt[i].pt[1]);
234a9de470cSBruce Richardson
235a9de470cSBruce Richardson
236a9de470cSBruce Richardson /* test phase - start and wait for completion on each active lcore */
237a9de470cSBruce Richardson
238cb056611SStephen Hemminger rte_eal_mp_remote_launch(plock_test1_lcore, lpt, CALL_MAIN);
239a9de470cSBruce Richardson rte_eal_mp_wait_lcore();
240a9de470cSBruce Richardson
241a9de470cSBruce Richardson /* validation phase - make sure that shared and local data match */
242a9de470cSBruce Richardson
243a9de470cSBruce Richardson for (i = 0; i != n; i++) {
244a9de470cSBruce Richardson sum[i] += lpt[i].sum[0];
245a9de470cSBruce Richardson sum[i + 1] += lpt[i].sum[1];
246a9de470cSBruce Richardson }
247a9de470cSBruce Richardson
248a9de470cSBruce Richardson sum[0] += sum[i];
249a9de470cSBruce Richardson
250a9de470cSBruce Richardson rc = 0;
251a9de470cSBruce Richardson for (i = 0; i != n; i++) {
252f8f207c0SDavid Christensen printf("%s: sum[%u]=%" PRIu64 ", pt[%u].val=%" PRIu64 ", pt[%u].iter=%" PRIu64 ";\n",
253a9de470cSBruce Richardson __func__, i, sum[i], i, pt[i].val, i, pt[i].iter);
254a9de470cSBruce Richardson
255a9de470cSBruce Richardson /* race condition occurred, lock doesn't work properly */
256a9de470cSBruce Richardson if (sum[i] != pt[i].val || 2 * iter != pt[i].iter) {
257834e4896SDavid Christensen printf("error: local and shared sums don't match\n");
258a9de470cSBruce Richardson rc = -1;
259a9de470cSBruce Richardson }
260a9de470cSBruce Richardson }
261a9de470cSBruce Richardson
262a9de470cSBruce Richardson free(pt);
263a9de470cSBruce Richardson free(lpt);
264a9de470cSBruce Richardson free(sum);
265a9de470cSBruce Richardson
266a9de470cSBruce Richardson printf("%s(utype=%u) returns %d\n", __func__, utype, rc);
267a9de470cSBruce Richardson return rc;
268a9de470cSBruce Richardson }
269a9de470cSBruce Richardson
270a9de470cSBruce Richardson static int
test_barrier(void)271a9de470cSBruce Richardson test_barrier(void)
272a9de470cSBruce Richardson {
273a9de470cSBruce Richardson int32_t i, ret, rc[USE_NUM];
274a9de470cSBruce Richardson
275a9de470cSBruce Richardson for (i = 0; i != RTE_DIM(rc); i++)
276a9de470cSBruce Richardson rc[i] = plock_test(ITER_MAX, i);
277a9de470cSBruce Richardson
278a9de470cSBruce Richardson ret = 0;
279a9de470cSBruce Richardson for (i = 0; i != RTE_DIM(rc); i++) {
280a9de470cSBruce Richardson printf("%s for utype=%d %s\n",
281a9de470cSBruce Richardson __func__, i, rc[i] == 0 ? "passed" : "failed");
282a9de470cSBruce Richardson ret |= rc[i];
283a9de470cSBruce Richardson }
284a9de470cSBruce Richardson
285a9de470cSBruce Richardson return ret;
286a9de470cSBruce Richardson }
287a9de470cSBruce Richardson
288*e0a8442cSBruce Richardson REGISTER_PERF_TEST(barrier_autotest, test_barrier);
289