xref: /dpdk/app/test/test_rwlock.c (revision b6a7e6852e9ab82ae0e05e2d2a0b83abca17de3b)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4 
5 #include <stdio.h>
6 #include <stdint.h>
7 #include <inttypes.h>
8 #include <unistd.h>
9 #include <sys/queue.h>
10 #include <string.h>
11 
12 #include <rte_common.h>
13 #include <rte_memory.h>
14 #include <rte_per_lcore.h>
15 #include <rte_launch.h>
16 #include <rte_rwlock.h>
17 #include <rte_eal.h>
18 #include <rte_lcore.h>
19 #include <rte_cycles.h>
20 
21 #include "test.h"
22 
23 /*
24  * rwlock test
25  * ===========
26  * Provides UT for rte_rwlock API.
27  * Main concern is on functional testing, but also provides some
28  * performance measurements.
29  * Obviously for proper testing need to be executed with more than one lcore.
30  */
31 
32 #define ITER_NUM	0x80
33 
34 #define TEST_SEC	5
35 
36 static rte_rwlock_t sl;
37 static rte_rwlock_t sl_tab[RTE_MAX_LCORE];
38 static RTE_ATOMIC(uint32_t) synchro;
39 
40 enum {
41 	LC_TYPE_RDLOCK,
42 	LC_TYPE_WRLOCK,
43 };
44 
45 static alignas(RTE_CACHE_LINE_SIZE) struct {
46 	rte_rwlock_t lock;
47 	uint64_t tick;
48 
49 	volatile union {
50 		uint8_t u8[RTE_CACHE_LINE_SIZE];
51 		uint64_t u64[RTE_CACHE_LINE_SIZE / sizeof(uint64_t)];
52 	} data;
53 } try_rwlock_data;
54 
55 struct __rte_cache_aligned try_rwlock_lcore {
56 	int32_t rc;
57 	int32_t type;
58 	struct {
59 		uint64_t tick;
60 		uint64_t fail;
61 		uint64_t success;
62 	} stat;
63 };
64 
65 static struct try_rwlock_lcore try_lcore_data[RTE_MAX_LCORE];
66 
67 static int
test_rwlock_per_core(__rte_unused void * arg)68 test_rwlock_per_core(__rte_unused void *arg)
69 {
70 	rte_rwlock_write_lock(&sl);
71 	printf("Global write lock taken on core %u\n", rte_lcore_id());
72 	rte_rwlock_write_unlock(&sl);
73 
74 	rte_rwlock_write_lock(&sl_tab[rte_lcore_id()]);
75 	printf("Hello from core %u !\n", rte_lcore_id());
76 	rte_rwlock_write_unlock(&sl_tab[rte_lcore_id()]);
77 
78 	rte_rwlock_read_lock(&sl);
79 	printf("Global read lock taken on core %u\n", rte_lcore_id());
80 	rte_delay_ms(100);
81 	printf("Release global read lock on core %u\n", rte_lcore_id());
82 	rte_rwlock_read_unlock(&sl);
83 
84 	return 0;
85 }
86 
87 static rte_rwlock_t lk = RTE_RWLOCK_INITIALIZER;
88 static volatile uint64_t rwlock_data;
89 static uint64_t time_count[RTE_MAX_LCORE] = {0};
90 
91 #define MAX_LOOP 10000
92 #define TEST_RWLOCK_DEBUG 0
93 
94 static int
load_loop_fn(__rte_unused void * arg)95 load_loop_fn(__rte_unused void *arg)
96 {
97 	uint64_t time_diff = 0, begin;
98 	uint64_t hz = rte_get_timer_hz();
99 	uint64_t lcount = 0;
100 	const unsigned int lcore = rte_lcore_id();
101 
102 	/* wait synchro for workers */
103 	if (lcore != rte_get_main_lcore())
104 		rte_wait_until_equal_32((uint32_t *)(uintptr_t)&synchro, 1,
105 				rte_memory_order_relaxed);
106 
107 	begin = rte_rdtsc_precise();
108 	while (lcount < MAX_LOOP) {
109 		rte_rwlock_write_lock(&lk);
110 		++rwlock_data;
111 		rte_rwlock_write_unlock(&lk);
112 
113 		rte_rwlock_read_lock(&lk);
114 		if (TEST_RWLOCK_DEBUG && !(lcount % 100))
115 			printf("Core [%u] rwlock_data = %"PRIu64"\n",
116 				lcore, rwlock_data);
117 		rte_rwlock_read_unlock(&lk);
118 
119 		lcount++;
120 		/* delay to make lock duty cycle slightly realistic */
121 		rte_pause();
122 	}
123 
124 	time_diff = rte_rdtsc_precise() - begin;
125 	time_count[lcore] = time_diff * 1000000 / hz;
126 	return 0;
127 }
128 
129 static int
test_rwlock_perf(void)130 test_rwlock_perf(void)
131 {
132 	unsigned int i;
133 	uint64_t total = 0;
134 
135 	printf("\nRwlock Perf Test on %u cores...\n", rte_lcore_count());
136 
137 	/* clear synchro and start workers */
138 	rte_atomic_store_explicit(&synchro, 0, rte_memory_order_relaxed);
139 	if (rte_eal_mp_remote_launch(load_loop_fn, NULL, SKIP_MAIN) < 0)
140 		return -1;
141 
142 	/* start synchro and launch test on main */
143 	rte_atomic_store_explicit(&synchro, 1, rte_memory_order_relaxed);
144 	load_loop_fn(NULL);
145 
146 	rte_eal_mp_wait_lcore();
147 
148 	RTE_LCORE_FOREACH(i) {
149 		printf("Core [%u] cost time = %"PRIu64" us\n",
150 			i, time_count[i]);
151 		total += time_count[i];
152 	}
153 
154 	printf("Total cost time = %"PRIu64" us\n", total);
155 	memset(time_count, 0, sizeof(time_count));
156 
157 	return 0;
158 }
159 
160 /*
161  * - There is a global rwlock and a table of rwlocks (one per lcore).
162  *
163  * - The test function takes all of these locks and launches the
164  *   ``test_rwlock_per_core()`` function on each core (except the main).
165  *
166  *   - The function takes the global write lock, display something,
167  *     then releases the global lock.
168  *   - Then, it takes the per-lcore write lock, display something, and
169  *     releases the per-core lock.
170  *   - Finally, a read lock is taken during 100 ms, then released.
171  *
172  * - The main function unlocks the per-lcore locks sequentially and
173  *   waits between each lock. This triggers the display of a message
174  *   for each core, in the correct order.
175  *
176  *   Then, it tries to take the global write lock and display the last
177  *   message. The autotest script checks that the message order is correct.
178  */
179 static int
rwlock_test1(void)180 rwlock_test1(void)
181 {
182 	int i;
183 
184 	rte_rwlock_init(&sl);
185 	for (i = 0; i < RTE_MAX_LCORE; i++)
186 		rte_rwlock_init(&sl_tab[i]);
187 
188 	rte_rwlock_write_lock(&sl);
189 
190 	RTE_LCORE_FOREACH_WORKER(i) {
191 		rte_rwlock_write_lock(&sl_tab[i]);
192 		rte_eal_remote_launch(test_rwlock_per_core, NULL, i);
193 	}
194 
195 	rte_rwlock_write_unlock(&sl);
196 
197 	RTE_LCORE_FOREACH_WORKER(i) {
198 		rte_rwlock_write_unlock(&sl_tab[i]);
199 		rte_delay_ms(100);
200 	}
201 
202 	rte_rwlock_write_lock(&sl);
203 	/* this message should be the last message of test */
204 	printf("Global write lock taken on main core %u\n", rte_lcore_id());
205 	rte_rwlock_write_unlock(&sl);
206 
207 	rte_eal_mp_wait_lcore();
208 
209 	if (test_rwlock_perf() < 0)
210 		return -1;
211 
212 	return 0;
213 }
214 
215 static int
try_read(uint32_t lc)216 try_read(uint32_t lc)
217 {
218 	int32_t rc;
219 	uint32_t i;
220 
221 	rc = rte_rwlock_read_trylock(&try_rwlock_data.lock);
222 	if (rc != 0)
223 		return rc;
224 
225 	for (i = 0; i != RTE_DIM(try_rwlock_data.data.u64); i++) {
226 
227 		/* race condition occurred, lock doesn't work properly */
228 		if (try_rwlock_data.data.u64[i] != 0) {
229 			printf("%s(%u) error: unexpected data pattern\n",
230 				__func__, lc);
231 			rte_memdump(stdout, NULL,
232 				(void *)(uintptr_t)&try_rwlock_data.data,
233 				sizeof(try_rwlock_data.data));
234 			rc = -EFAULT;
235 			break;
236 		}
237 	}
238 
239 	rte_rwlock_read_unlock(&try_rwlock_data.lock);
240 	return rc;
241 }
242 
243 static int
try_write(uint32_t lc)244 try_write(uint32_t lc)
245 {
246 	int32_t rc;
247 	uint32_t i, v;
248 
249 	v = RTE_MAX(lc % UINT8_MAX, 1U);
250 
251 	rc = rte_rwlock_write_trylock(&try_rwlock_data.lock);
252 	if (rc != 0)
253 		return rc;
254 
255 	/* update by bytes in reverse order */
256 	for (i = RTE_DIM(try_rwlock_data.data.u8); i-- != 0; ) {
257 
258 		/* race condition occurred, lock doesn't work properly */
259 		if (try_rwlock_data.data.u8[i] != 0) {
260 			printf("%s:%d(%u) error: unexpected data pattern\n",
261 				__func__, __LINE__, lc);
262 			rte_memdump(stdout, NULL,
263 				(void *)(uintptr_t)&try_rwlock_data.data,
264 				sizeof(try_rwlock_data.data));
265 			rc = -EFAULT;
266 			break;
267 		}
268 
269 		try_rwlock_data.data.u8[i] = v;
270 	}
271 
272 	/* restore by bytes in reverse order */
273 	for (i = RTE_DIM(try_rwlock_data.data.u8); i-- != 0; ) {
274 
275 		/* race condition occurred, lock doesn't work properly */
276 		if (try_rwlock_data.data.u8[i] != v) {
277 			printf("%s:%d(%u) error: unexpected data pattern\n",
278 				__func__, __LINE__, lc);
279 			rte_memdump(stdout, NULL,
280 				(void *)(uintptr_t)&try_rwlock_data.data,
281 				sizeof(try_rwlock_data.data));
282 			rc = -EFAULT;
283 			break;
284 		}
285 
286 		try_rwlock_data.data.u8[i] = 0;
287 	}
288 
289 	rte_rwlock_write_unlock(&try_rwlock_data.lock);
290 	return rc;
291 }
292 
293 static int
try_read_lcore(__rte_unused void * data)294 try_read_lcore(__rte_unused void *data)
295 {
296 	int32_t rc;
297 	uint32_t i, lc;
298 	uint64_t ftm, stm, tm;
299 	struct try_rwlock_lcore *lcd;
300 
301 	lc = rte_lcore_id();
302 	lcd = try_lcore_data + lc;
303 	lcd->type = LC_TYPE_RDLOCK;
304 
305 	ftm = try_rwlock_data.tick;
306 	stm = rte_get_timer_cycles();
307 
308 	do {
309 		for (i = 0; i != ITER_NUM; i++) {
310 			rc = try_read(lc);
311 			if (rc == 0)
312 				lcd->stat.success++;
313 			else if (rc == -EBUSY)
314 				lcd->stat.fail++;
315 			else
316 				break;
317 			rc = 0;
318 		}
319 		tm = rte_get_timer_cycles() - stm;
320 	} while (tm < ftm && rc == 0);
321 
322 	lcd->rc = rc;
323 	lcd->stat.tick = tm;
324 	return rc;
325 }
326 
327 static int
try_write_lcore(__rte_unused void * data)328 try_write_lcore(__rte_unused void *data)
329 {
330 	int32_t rc;
331 	uint32_t i, lc;
332 	uint64_t ftm, stm, tm;
333 	struct try_rwlock_lcore *lcd;
334 
335 	lc = rte_lcore_id();
336 	lcd = try_lcore_data + lc;
337 	lcd->type = LC_TYPE_WRLOCK;
338 
339 	ftm = try_rwlock_data.tick;
340 	stm = rte_get_timer_cycles();
341 
342 	do {
343 		for (i = 0; i != ITER_NUM; i++) {
344 			rc = try_write(lc);
345 			if (rc == 0)
346 				lcd->stat.success++;
347 			else if (rc == -EBUSY)
348 				lcd->stat.fail++;
349 			else
350 				break;
351 			rc = 0;
352 		}
353 		tm = rte_get_timer_cycles() - stm;
354 	} while (tm < ftm && rc == 0);
355 
356 	lcd->rc = rc;
357 	lcd->stat.tick = tm;
358 	return rc;
359 }
360 
361 static void
print_try_lcore_stats(const struct try_rwlock_lcore * tlc,uint32_t lc)362 print_try_lcore_stats(const struct try_rwlock_lcore *tlc, uint32_t lc)
363 {
364 	uint64_t f, s;
365 
366 	f = RTE_MAX(tlc->stat.fail, 1ULL);
367 	s = RTE_MAX(tlc->stat.success, 1ULL);
368 
369 	printf("try_lcore_data[%u]={\n"
370 		"\trc=%d,\n"
371 		"\ttype=%s,\n"
372 		"\tfail=%" PRIu64 ",\n"
373 		"\tsuccess=%" PRIu64 ",\n"
374 		"\tcycles=%" PRIu64 ",\n"
375 		"\tcycles/op=%#Lf,\n"
376 		"\tcycles/success=%#Lf,\n"
377 		"\tsuccess/fail=%#Lf,\n"
378 		"};\n",
379 		lc,
380 		tlc->rc,
381 		tlc->type == LC_TYPE_RDLOCK ? "RDLOCK" : "WRLOCK",
382 		tlc->stat.fail,
383 		tlc->stat.success,
384 		tlc->stat.tick,
385 		(long double)tlc->stat.tick /
386 		(tlc->stat.fail + tlc->stat.success),
387 		(long double)tlc->stat.tick / s,
388 		(long double)tlc->stat.success / f);
389 }
390 
391 static void
collect_try_lcore_stats(struct try_rwlock_lcore * tlc,const struct try_rwlock_lcore * lc)392 collect_try_lcore_stats(struct try_rwlock_lcore *tlc,
393 	const struct try_rwlock_lcore *lc)
394 {
395 	tlc->stat.tick += lc->stat.tick;
396 	tlc->stat.fail += lc->stat.fail;
397 	tlc->stat.success += lc->stat.success;
398 }
399 
400 /*
401  * Process collected results:
402  *  - check status
403  *  - collect and print statistics
404  */
405 static int
process_try_lcore_stats(void)406 process_try_lcore_stats(void)
407 {
408 	int32_t rc;
409 	uint32_t lc, rd, wr;
410 	struct try_rwlock_lcore rlc, wlc;
411 
412 	memset(&rlc, 0, sizeof(rlc));
413 	memset(&wlc, 0, sizeof(wlc));
414 
415 	rlc.type = LC_TYPE_RDLOCK;
416 	wlc.type = LC_TYPE_WRLOCK;
417 	rd = 0;
418 	wr = 0;
419 
420 	rc = 0;
421 	RTE_LCORE_FOREACH(lc) {
422 		rc |= try_lcore_data[lc].rc;
423 		if (try_lcore_data[lc].type == LC_TYPE_RDLOCK) {
424 			collect_try_lcore_stats(&rlc, try_lcore_data + lc);
425 			rd++;
426 		} else {
427 			collect_try_lcore_stats(&wlc, try_lcore_data + lc);
428 			wr++;
429 		}
430 	}
431 
432 	if (rc == 0) {
433 		RTE_LCORE_FOREACH(lc)
434 			print_try_lcore_stats(try_lcore_data + lc, lc);
435 
436 		if (rd != 0) {
437 			printf("aggregated stats for %u RDLOCK cores:\n", rd);
438 			print_try_lcore_stats(&rlc, rd);
439 		}
440 
441 		if (wr != 0) {
442 			printf("aggregated stats for %u WRLOCK cores:\n", wr);
443 			print_try_lcore_stats(&wlc, wr);
444 		}
445 	}
446 
447 	return rc;
448 }
449 
450 static void
try_test_reset(void)451 try_test_reset(void)
452 {
453 	memset(&try_lcore_data, 0, sizeof(try_lcore_data));
454 	memset(&try_rwlock_data, 0, sizeof(try_rwlock_data));
455 	try_rwlock_data.tick = TEST_SEC * rte_get_tsc_hz();
456 }
457 
458 /* all lcores grab RDLOCK */
459 static int
try_rwlock_test_rda(void)460 try_rwlock_test_rda(void)
461 {
462 	try_test_reset();
463 
464 	/* start read test on all available lcores */
465 	rte_eal_mp_remote_launch(try_read_lcore, NULL, CALL_MAIN);
466 	rte_eal_mp_wait_lcore();
467 
468 	return process_try_lcore_stats();
469 }
470 
471 /* all worker lcores grab RDLOCK, main one grabs WRLOCK */
472 static int
try_rwlock_test_rds_wrm(void)473 try_rwlock_test_rds_wrm(void)
474 {
475 	try_test_reset();
476 
477 	rte_eal_mp_remote_launch(try_read_lcore, NULL, SKIP_MAIN);
478 	try_write_lcore(NULL);
479 	rte_eal_mp_wait_lcore();
480 
481 	return process_try_lcore_stats();
482 }
483 
484 /* main and even worker lcores grab RDLOCK, odd lcores grab WRLOCK */
485 static int
try_rwlock_test_rde_wro(void)486 try_rwlock_test_rde_wro(void)
487 {
488 	uint32_t lc, mlc;
489 
490 	try_test_reset();
491 
492 	mlc = rte_get_main_lcore();
493 
494 	RTE_LCORE_FOREACH(lc) {
495 		if (lc != mlc) {
496 			if ((lc & 1) == 0)
497 				rte_eal_remote_launch(try_read_lcore,
498 						NULL, lc);
499 			else
500 				rte_eal_remote_launch(try_write_lcore,
501 						NULL, lc);
502 		}
503 	}
504 	try_read_lcore(NULL);
505 	rte_eal_mp_wait_lcore();
506 
507 	return process_try_lcore_stats();
508 }
509 
510 REGISTER_FAST_TEST(rwlock_test1_autotest, true, true, rwlock_test1);
511 REGISTER_FAST_TEST(rwlock_rda_autotest, true, true, try_rwlock_test_rda);
512 REGISTER_FAST_TEST(rwlock_rds_wrm_autotest, true, true, try_rwlock_test_rds_wrm);
513 REGISTER_FAST_TEST(rwlock_rde_wro_autotest, true, true, try_rwlock_test_rde_wro);
514