xref: /dpdk/app/test/test_barrier.c (revision e0a8442ccd15bafbb7eb150c35331c8e3b828c53)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2018 Intel Corporation
3  */
4 
5  /*
6   * This is a simple functional test for rte_smp_mb() implementation.
7   * I.E. make sure that LOAD and STORE operations that precede the
8   * rte_smp_mb() call are globally visible across the lcores
9   * before the LOAD and STORE operations that follows it.
10   * The test uses simple implementation of Peterson's lock algorithm
11   * (https://en.wikipedia.org/wiki/Peterson%27s_algorithm)
12   * for two execution units to make sure that rte_smp_mb() prevents
13   * store-load reordering to happen.
14   * Also when executed on a single lcore could be used as a approximate
15   * estimation of number of cycles particular implementation of rte_smp_mb()
16   * will take.
17   */
18 
19 #include <stdio.h>
20 #include <string.h>
21 #include <stdint.h>
22 #include <inttypes.h>
23 
24 #include <rte_memory.h>
25 #include <rte_per_lcore.h>
26 #include <rte_launch.h>
27 #include <rte_eal.h>
28 #include <rte_lcore.h>
29 #include <rte_pause.h>
30 #include <rte_random.h>
31 #include <rte_cycles.h>
32 #include <rte_vect.h>
33 #include <rte_debug.h>
34 
35 #include "test.h"
36 
37 #define ADD_MAX		8
38 #define ITER_MAX	0x1000000
39 
40 enum plock_use_type {
41 	USE_MB,
42 	USE_SMP_MB,
43 	USE_NUM
44 };
45 
46 struct plock {
47 	volatile uint32_t flag[2];
48 	volatile uint32_t victim;
49 	enum plock_use_type utype;
50 };
51 
52 /*
53  * Lock plus protected by it two counters.
54  */
55 struct plock_test {
56 	struct plock lock;
57 	uint64_t val;
58 	uint64_t iter;
59 };
60 
61 /*
62  * Each active lcore shares plock_test struct with it's left and right
63  * neighbours.
64  */
65 struct lcore_plock_test {
66 	struct plock_test *pt[2]; /* shared, lock-protected data */
67 	uint64_t sum[2];          /* local copy of the shared data */
68 	uint64_t iter;            /* number of iterations to perform */
69 	uint32_t lc;              /* given lcore id */
70 };
71 
72 static inline void
store_load_barrier(uint32_t utype)73 store_load_barrier(uint32_t utype)
74 {
75 	if (utype == USE_MB)
76 		rte_mb();
77 	else if (utype == USE_SMP_MB)
78 		rte_smp_mb();
79 	else
80 		RTE_VERIFY(0);
81 }
82 
83 /*
84  * Peterson lock implementation.
85  */
86 static void
plock_lock(struct plock * l,uint32_t self)87 plock_lock(struct plock *l, uint32_t self)
88 {
89 	uint32_t other;
90 
91 	other = self ^ 1;
92 
93 	l->flag[self] = 1;
94 	rte_smp_wmb();
95 	l->victim = self;
96 
97 	store_load_barrier(l->utype);
98 
99 	while (l->flag[other] == 1 && l->victim == self)
100 		rte_pause();
101 	rte_smp_rmb();
102 }
103 
104 static void
plock_unlock(struct plock * l,uint32_t self)105 plock_unlock(struct plock *l, uint32_t self)
106 {
107 	rte_smp_wmb();
108 	l->flag[self] = 0;
109 }
110 
111 static void
plock_reset(struct plock * l,enum plock_use_type utype)112 plock_reset(struct plock *l, enum plock_use_type utype)
113 {
114 	memset(l, 0, sizeof(*l));
115 	l->utype = utype;
116 }
117 
118 /*
119  * grab the lock, update both counters, release the lock.
120  */
121 static void
plock_add(struct plock_test * pt,uint32_t self,uint32_t n)122 plock_add(struct plock_test *pt, uint32_t self, uint32_t n)
123 {
124 	plock_lock(&pt->lock, self);
125 	pt->iter++;
126 	pt->val += n;
127 	plock_unlock(&pt->lock, self);
128 }
129 
130 static int
plock_test1_lcore(void * data)131 plock_test1_lcore(void *data)
132 {
133 	uint64_t tm;
134 	uint32_t lc, ln;
135 	uint64_t i, n;
136 	struct lcore_plock_test *lpt;
137 
138 	lpt = data;
139 	lc = rte_lcore_id();
140 
141 	/* find lcore_plock_test struct for given lcore */
142 	for (ln = rte_lcore_count(); ln != 0 && lpt->lc != lc; lpt++, ln--)
143 		;
144 
145 	if (ln == 0) {
146 		printf("%s(%u) error at init\n", __func__, lc);
147 		return -1;
148 	}
149 
150 	n = rte_rand() % ADD_MAX;
151 	tm = rte_get_timer_cycles();
152 
153 	/*
154 	 * for each iteration:
155 	 * - update shared, locked protected data in a safe manner
156 	 * - update local copy of the shared data
157 	 */
158 	for (i = 0; i != lpt->iter; i++) {
159 
160 		plock_add(lpt->pt[0], 0, n);
161 		plock_add(lpt->pt[1], 1, n);
162 
163 		lpt->sum[0] += n;
164 		lpt->sum[1] += n;
165 
166 		n = (n + 1) % ADD_MAX;
167 	}
168 
169 	tm = rte_get_timer_cycles() - tm;
170 
171 	printf("%s(%u): %" PRIu64 " iterations finished, in %" PRIu64
172 		" cycles, %#Lf cycles/iteration, "
173 		"local sum={%" PRIu64 ", %" PRIu64 "}\n",
174 		__func__, lc, i, tm, (long double)tm / i,
175 		lpt->sum[0], lpt->sum[1]);
176 	return 0;
177 }
178 
179 /*
180  * For N active lcores we allocate N+1 lcore_plock_test structures.
181  * Each active lcore shares one lcore_plock_test structure with its
182  * left lcore neighbor and one lcore_plock_test structure with its
183  * right lcore neighbor.
184  * During the test each lcore updates data in both shared structures and
185  * its local copies. Then at validation phase we check that our shared
186  * and local data are the same.
187  */
188 static int
plock_test(uint64_t iter,enum plock_use_type utype)189 plock_test(uint64_t iter, enum plock_use_type utype)
190 {
191 	int32_t rc;
192 	uint32_t i, lc, n;
193 	uint64_t *sum;
194 	struct plock_test *pt;
195 	struct lcore_plock_test *lpt;
196 
197 	/* init phase, allocate and initialize shared data */
198 
199 	n = rte_lcore_count();
200 	pt = calloc(n + 1, sizeof(*pt));
201 	lpt = calloc(n, sizeof(*lpt));
202 	sum = calloc(n + 1, sizeof(*sum));
203 
204 	printf("%s(iter=%" PRIu64 ", utype=%u) started on %u lcores\n",
205 		__func__, iter, utype, n);
206 
207 	if (pt == NULL || lpt == NULL || sum == NULL) {
208 		printf("%s: failed to allocate memory for %u lcores\n",
209 			__func__, n);
210 		free(pt);
211 		free(lpt);
212 		free(sum);
213 		return -ENOMEM;
214 	}
215 
216 	for (i = 0; i != n + 1; i++)
217 		plock_reset(&pt[i].lock, utype);
218 
219 	i = 0;
220 	RTE_LCORE_FOREACH(lc) {
221 
222 		lpt[i].lc = lc;
223 		lpt[i].iter = iter;
224 		lpt[i].pt[0] = pt + i;
225 		lpt[i].pt[1] = pt + i + 1;
226 		i++;
227 	}
228 
229 	lpt[i - 1].pt[1] = pt;
230 
231 	for (i = 0; i != n; i++)
232 		printf("lpt[%u]={lc=%u, pt={%p, %p},};\n",
233 			i, lpt[i].lc, lpt[i].pt[0], lpt[i].pt[1]);
234 
235 
236 	/* test phase - start and wait for completion on each active lcore */
237 
238 	rte_eal_mp_remote_launch(plock_test1_lcore, lpt, CALL_MAIN);
239 	rte_eal_mp_wait_lcore();
240 
241 	/* validation phase - make sure that shared and local data match */
242 
243 	for (i = 0; i != n; i++) {
244 		sum[i] += lpt[i].sum[0];
245 		sum[i + 1] += lpt[i].sum[1];
246 	}
247 
248 	sum[0] += sum[i];
249 
250 	rc = 0;
251 	for (i = 0; i != n; i++) {
252 		printf("%s: sum[%u]=%" PRIu64 ", pt[%u].val=%" PRIu64 ", pt[%u].iter=%" PRIu64 ";\n",
253 			__func__, i, sum[i], i, pt[i].val, i, pt[i].iter);
254 
255 		/* race condition occurred, lock doesn't work properly */
256 		if (sum[i] != pt[i].val || 2 * iter != pt[i].iter) {
257 			printf("error: local and shared sums don't match\n");
258 			rc = -1;
259 		}
260 	}
261 
262 	free(pt);
263 	free(lpt);
264 	free(sum);
265 
266 	printf("%s(utype=%u) returns %d\n", __func__, utype, rc);
267 	return rc;
268 }
269 
270 static int
test_barrier(void)271 test_barrier(void)
272 {
273 	int32_t i, ret, rc[USE_NUM];
274 
275 	for (i = 0; i != RTE_DIM(rc); i++)
276 		rc[i] = plock_test(ITER_MAX, i);
277 
278 	ret = 0;
279 	for (i = 0; i != RTE_DIM(rc); i++) {
280 		printf("%s for utype=%d %s\n",
281 			__func__, i, rc[i] == 0 ? "passed" : "failed");
282 		ret |= rc[i];
283 	}
284 
285 	return ret;
286 }
287 
288 REGISTER_PERF_TEST(barrier_autotest, test_barrier);
289