xref: /dpdk/app/test/test_atomic.c (revision e0a8442ccd15bafbb7eb150c35331c8e3b828c53)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  * Copyright(c) 2019 Arm Limited
4  */
5 
6 #include <stdio.h>
7 #include <stdint.h>
8 #include <unistd.h>
9 #include <inttypes.h>
10 #include <sys/queue.h>
11 
12 #include <rte_memory.h>
13 #include <rte_per_lcore.h>
14 #include <rte_launch.h>
15 #include <rte_atomic.h>
16 #include <rte_eal.h>
17 #include <rte_lcore.h>
18 #include <rte_random.h>
19 #include <rte_hash_crc.h>
20 
21 #include "test.h"
22 
23 /*
24  * Atomic Variables
25  * ================
26  *
27  * - The main test function performs several subtests. The first
28  *   checks that the usual inc/dec/add/sub functions are working
29  *   correctly:
30  *
31  *   - Initialize 16-bit, 32-bit and 64-bit atomic variables to specific
32  *     values.
33  *
34  *   - These variables are incremented and decremented on each core at
35  *     the same time in ``test_atomic_usual()``.
36  *
37  *   - The function checks that once all lcores finish their function,
38  *     the value of the atomic variables are still the same.
39  *
40  * - Test "test and set" functions.
41  *
42  *   - Initialize 16-bit, 32-bit and 64-bit atomic variables to zero.
43  *
44  *   - Invoke ``test_atomic_tas()`` on each lcore: before doing anything
45  *     else. The cores are waiting a synchro using ``while
46  *     (rte_atomic32_read(&val) == 0)`` which is triggered by the main test
47  *     function. Then all cores do a
48  *     ``rte_atomicXX_test_and_set()`` at the same time. If it is successful,
49  *     it increments another atomic counter.
50  *
51  *   - The main function checks that the atomic counter was incremented
52  *     twice only (one for 16-bit, one for 32-bit and one for 64-bit values).
53  *
54  * - Test "add/sub and return" functions
55  *
56  *   - Initialize 16-bit, 32-bit and 64-bit atomic variables to zero.
57  *
58  *   - Invoke ``test_atomic_addsub_return()`` on each lcore. Before doing
59  *     anything else, the cores are waiting a synchro. Each lcore does
60  *     this operation several times::
61  *
62  *       tmp = rte_atomicXX_add_return(&a, 1);
63  *       atomic_add(&count, tmp);
64  *       tmp = rte_atomicXX_sub_return(&a, 1);
65  *       atomic_sub(&count, tmp+1);
66  *
67  *   - At the end of the test, the *count* value must be 0.
68  *
69  * - Test "128-bit compare and swap" (aarch64 and x86_64 only)
70  *
71  *   - Initialize 128-bit atomic variables to zero.
72  *
73  *   - Invoke ``test_atomic128_cmp_exchange()`` on each lcore. Before doing
74  *     anything else, the cores are waiting a synchro. Each lcore does
75  *     these compare and swap (CAS) operations several times::
76  *
77  *       Acquired CAS update counter.val[0] + 2; counter.val[1] + 1;
78  *       Released CAS update counter.val[0] + 2; counter.val[1] + 1;
79  *       Acquired_Released CAS update counter.val[0] + 2; counter.val[1] + 1;
80  *       Relaxed CAS update counter.val[0] + 2; counter.val[1] + 1;
81  *
82  *   - At the end of the test, the *count128* first 64-bit value and
83  *     second 64-bit value differ by the total iterations.
84  *
85  * - Test "atomic exchange" functions
86  *
87  *   - Create a 64 bit token that can be tested for data integrity
88  *
89  *   - Invoke ``test_atomic_exchange`` on each lcore.  Before doing
90  *     anything else, the cores wait for a synchronization event.
91  *     Each core then does the following for N iterations:
92  *
93  *       Generate a new token with a data integrity check
94  *       Exchange the new token for previously generated token
95  *       Increment a counter if a corrupt token was received
96  *
97  *   - At the end of the test, the number of corrupted tokens must be 0.
98  */
99 
100 #define NUM_ATOMIC_TYPES 3
101 
102 #define N 1000000
103 
104 static rte_atomic16_t a16;
105 static rte_atomic32_t a32;
106 static rte_atomic64_t a64;
107 static rte_atomic64_t count;
108 static rte_atomic32_t synchro;
109 
110 static int
test_atomic_usual(__rte_unused void * arg)111 test_atomic_usual(__rte_unused void *arg)
112 {
113 	unsigned i;
114 
115 	while (rte_atomic32_read(&synchro) == 0)
116 		;
117 
118 	for (i = 0; i < N; i++)
119 		rte_atomic16_inc(&a16);
120 	for (i = 0; i < N; i++)
121 		rte_atomic16_dec(&a16);
122 	for (i = 0; i < (N / 5); i++)
123 		rte_atomic16_add(&a16, 5);
124 	for (i = 0; i < (N / 5); i++)
125 		rte_atomic16_sub(&a16, 5);
126 
127 	for (i = 0; i < N; i++)
128 		rte_atomic32_inc(&a32);
129 	for (i = 0; i < N; i++)
130 		rte_atomic32_dec(&a32);
131 	for (i = 0; i < (N / 5); i++)
132 		rte_atomic32_add(&a32, 5);
133 	for (i = 0; i < (N / 5); i++)
134 		rte_atomic32_sub(&a32, 5);
135 
136 	for (i = 0; i < N; i++)
137 		rte_atomic64_inc(&a64);
138 	for (i = 0; i < N; i++)
139 		rte_atomic64_dec(&a64);
140 	for (i = 0; i < (N / 5); i++)
141 		rte_atomic64_add(&a64, 5);
142 	for (i = 0; i < (N / 5); i++)
143 		rte_atomic64_sub(&a64, 5);
144 
145 	return 0;
146 }
147 
148 static int
test_atomic_tas(__rte_unused void * arg)149 test_atomic_tas(__rte_unused void *arg)
150 {
151 	while (rte_atomic32_read(&synchro) == 0)
152 		;
153 
154 	if (rte_atomic16_test_and_set(&a16))
155 		rte_atomic64_inc(&count);
156 	if (rte_atomic32_test_and_set(&a32))
157 		rte_atomic64_inc(&count);
158 	if (rte_atomic64_test_and_set(&a64))
159 		rte_atomic64_inc(&count);
160 
161 	return 0;
162 }
163 
164 static int
test_atomic_addsub_and_return(__rte_unused void * arg)165 test_atomic_addsub_and_return(__rte_unused void *arg)
166 {
167 	uint32_t tmp16;
168 	uint32_t tmp32;
169 	uint64_t tmp64;
170 	unsigned i;
171 
172 	while (rte_atomic32_read(&synchro) == 0)
173 		;
174 
175 	for (i = 0; i < N; i++) {
176 		tmp16 = rte_atomic16_add_return(&a16, 1);
177 		rte_atomic64_add(&count, tmp16);
178 
179 		tmp16 = rte_atomic16_sub_return(&a16, 1);
180 		rte_atomic64_sub(&count, tmp16+1);
181 
182 		tmp32 = rte_atomic32_add_return(&a32, 1);
183 		rte_atomic64_add(&count, tmp32);
184 
185 		tmp32 = rte_atomic32_sub_return(&a32, 1);
186 		rte_atomic64_sub(&count, tmp32+1);
187 
188 		tmp64 = rte_atomic64_add_return(&a64, 1);
189 		rte_atomic64_add(&count, tmp64);
190 
191 		tmp64 = rte_atomic64_sub_return(&a64, 1);
192 		rte_atomic64_sub(&count, tmp64+1);
193 	}
194 
195 	return 0;
196 }
197 
198 /*
199  * rte_atomic32_inc_and_test() would increase a 32 bits counter by one and then
200  * test if that counter is equal to 0. It would return true if the counter is 0
201  * and false if the counter is not 0. rte_atomic64_inc_and_test() could do the
202  * same thing but for a 64 bits counter.
203  * Here checks that if the 32/64 bits counter is equal to 0 after being atomically
204  * increased by one. If it is, increase the variable of "count" by one which would
205  * be checked as the result later.
206  *
207  */
208 static int
test_atomic_inc_and_test(__rte_unused void * arg)209 test_atomic_inc_and_test(__rte_unused void *arg)
210 {
211 	while (rte_atomic32_read(&synchro) == 0)
212 		;
213 
214 	if (rte_atomic16_inc_and_test(&a16)) {
215 		rte_atomic64_inc(&count);
216 	}
217 	if (rte_atomic32_inc_and_test(&a32)) {
218 		rte_atomic64_inc(&count);
219 	}
220 	if (rte_atomic64_inc_and_test(&a64)) {
221 		rte_atomic64_inc(&count);
222 	}
223 
224 	return 0;
225 }
226 
227 /*
228  * rte_atomicXX_dec_and_test() should decrease a 32 bits counter by one and then
229  * test if that counter is equal to 0. It should return true if the counter is 0
230  * and false if the counter is not 0.
231  * This test checks if the counter is equal to 0 after being atomically
232  * decreased by one. If it is, increase the value of "count" by one which is to
233  * be checked as the result later.
234  */
235 static int
test_atomic_dec_and_test(__rte_unused void * arg)236 test_atomic_dec_and_test(__rte_unused void *arg)
237 {
238 	while (rte_atomic32_read(&synchro) == 0)
239 		;
240 
241 	if (rte_atomic16_dec_and_test(&a16))
242 		rte_atomic64_inc(&count);
243 
244 	if (rte_atomic32_dec_and_test(&a32))
245 		rte_atomic64_inc(&count);
246 
247 	if (rte_atomic64_dec_and_test(&a64))
248 		rte_atomic64_inc(&count);
249 
250 	return 0;
251 }
252 
253 #if defined(RTE_ARCH_X86_64) || defined(RTE_ARCH_ARM64)
254 static rte_int128_t count128;
255 
256 /*
257  * rte_atomic128_cmp_exchange() should update a 128 bits counter's first 64
258  * bits by 2 and the second 64 bits by 1 in this test. It should return true
259  * if the compare exchange operation is successful.
260  * This test repeats 128 bits compare and swap operations N rounds. In each
261  * iteration it runs compare and swap operation with different memory models.
262  */
263 static int
test_atomic128_cmp_exchange(__rte_unused void * arg)264 test_atomic128_cmp_exchange(__rte_unused void *arg)
265 {
266 	rte_int128_t expected;
267 	int success;
268 	unsigned int i;
269 
270 	while (rte_atomic32_read(&synchro) == 0)
271 		;
272 
273 	expected = count128;
274 
275 	for (i = 0; i < N; i++) {
276 		do {
277 			rte_int128_t desired;
278 
279 			desired.val[0] = expected.val[0] + 2;
280 			desired.val[1] = expected.val[1] + 1;
281 
282 			success = rte_atomic128_cmp_exchange(&count128,
283 				&expected, &desired, 1,
284 				__ATOMIC_ACQUIRE, __ATOMIC_RELAXED);
285 		} while (success == 0);
286 
287 		do {
288 			rte_int128_t desired;
289 
290 			desired.val[0] = expected.val[0] + 2;
291 			desired.val[1] = expected.val[1] + 1;
292 
293 			success = rte_atomic128_cmp_exchange(&count128,
294 					&expected, &desired, 1,
295 					__ATOMIC_RELEASE, __ATOMIC_RELAXED);
296 		} while (success == 0);
297 
298 		do {
299 			rte_int128_t desired;
300 
301 			desired.val[0] = expected.val[0] + 2;
302 			desired.val[1] = expected.val[1] + 1;
303 
304 			success = rte_atomic128_cmp_exchange(&count128,
305 					&expected, &desired, 1,
306 					__ATOMIC_ACQ_REL, __ATOMIC_RELAXED);
307 		} while (success == 0);
308 
309 		do {
310 			rte_int128_t desired;
311 
312 			desired.val[0] = expected.val[0] + 2;
313 			desired.val[1] = expected.val[1] + 1;
314 
315 			success = rte_atomic128_cmp_exchange(&count128,
316 					&expected, &desired, 1,
317 					__ATOMIC_RELAXED, __ATOMIC_RELAXED);
318 		} while (success == 0);
319 	}
320 
321 	return 0;
322 }
323 #endif
324 
325 /*
326  * Helper definitions/variables/functions for
327  * atomic exchange tests
328  */
329 typedef union {
330 	uint16_t u16;
331 	uint8_t  u8[2];
332 } test16_t;
333 
334 typedef union {
335 	uint32_t u32;
336 	uint16_t u16[2];
337 	uint8_t  u8[4];
338 } test32_t;
339 
340 typedef union {
341 	uint64_t u64;
342 	uint32_t u32[2];
343 	uint16_t u16[4];
344 	uint8_t  u8[8];
345 } test64_t;
346 
347 const uint8_t CRC8_POLY = 0x91;
348 uint8_t crc8_table[256];
349 
350 volatile uint16_t token16;
351 volatile uint32_t token32;
352 volatile uint64_t token64;
353 
354 static void
build_crc8_table(void)355 build_crc8_table(void)
356 {
357 	uint8_t val;
358 	int i, j;
359 
360 	for (i = 0; i < 256; i++) {
361 		val = i;
362 		for (j = 0; j < 8; j++) {
363 			if (val & 1)
364 				val ^= CRC8_POLY;
365 			val >>= 1;
366 		}
367 		crc8_table[i] = val;
368 	}
369 }
370 
371 static uint8_t
get_crc8(uint8_t * message,int length)372 get_crc8(uint8_t *message, int length)
373 {
374 	uint8_t crc = 0;
375 	int i;
376 
377 	for (i = 0; i < length; i++)
378 		crc = crc8_table[crc ^ message[i]];
379 	return crc;
380 }
381 
382 /*
383  * The atomic exchange test sets up a token in memory and
384  * then spins up multiple lcores whose job is to generate
385  * new tokens, exchange that new token for the old one held
386  * in memory, and then verify that the old token is still
387  * valid (i.e. the exchange did not corrupt the token).
388  *
389  * A token is made up of random data and 8 bits of crc
390  * covering that random data.  The following is an example
391  * of a 64bit token.
392  *
393  * +------------+------------+
394  * | 63      56 | 55       0 |
395  * +------------+------------+
396  * |    CRC8    |    Data    |
397  * +------------+------------+
398  */
399 static int
test_atomic_exchange(__rte_unused void * arg)400 test_atomic_exchange(__rte_unused void *arg)
401 {
402 	int i;
403 	test16_t nt16, ot16; /* new token, old token */
404 	test32_t nt32, ot32;
405 	test64_t nt64, ot64;
406 
407 	/* Wait until all of the other threads have been dispatched */
408 	while (rte_atomic32_read(&synchro) == 0)
409 		;
410 
411 	/*
412 	 * Let the battle begin! Every thread attempts to steal the current
413 	 * token with an atomic exchange operation and install its own newly
414 	 * generated token. If the old token is valid (i.e. it has the
415 	 * appropriate crc32 hash for the data) then the test iteration has
416 	 * passed.  If the token is invalid, increment the counter.
417 	 */
418 	for (i = 0; i < N; i++) {
419 
420 		/* Test 64bit Atomic Exchange */
421 		nt64.u64 = rte_rand();
422 		nt64.u8[7] = get_crc8(&nt64.u8[0], sizeof(nt64) - 1);
423 		ot64.u64 = rte_atomic64_exchange(&token64, nt64.u64);
424 		if (ot64.u8[7] != get_crc8(&ot64.u8[0], sizeof(ot64) - 1))
425 			rte_atomic64_inc(&count);
426 
427 		/* Test 32bit Atomic Exchange */
428 		nt32.u32 = (uint32_t)rte_rand();
429 		nt32.u8[3] = get_crc8(&nt32.u8[0], sizeof(nt32) - 1);
430 		ot32.u32 = rte_atomic32_exchange(&token32, nt32.u32);
431 		if (ot32.u8[3] != get_crc8(&ot32.u8[0], sizeof(ot32) - 1))
432 			rte_atomic64_inc(&count);
433 
434 		/* Test 16bit Atomic Exchange */
435 		nt16.u16 = (uint16_t)rte_rand();
436 		nt16.u8[1] = get_crc8(&nt16.u8[0], sizeof(nt16) - 1);
437 		ot16.u16 = rte_atomic16_exchange(&token16, nt16.u16);
438 		if (ot16.u8[1] != get_crc8(&ot16.u8[0], sizeof(ot16) - 1))
439 			rte_atomic64_inc(&count);
440 	}
441 
442 	return 0;
443 }
444 static int
test_atomic(void)445 test_atomic(void)
446 {
447 	rte_atomic16_init(&a16);
448 	rte_atomic32_init(&a32);
449 	rte_atomic64_init(&a64);
450 	rte_atomic64_init(&count);
451 	rte_atomic32_init(&synchro);
452 
453 	rte_atomic16_set(&a16, 1UL << 10);
454 	rte_atomic32_set(&a32, 1UL << 10);
455 	rte_atomic64_set(&a64, 1ULL << 33);
456 
457 	printf("usual inc/dec/add/sub functions\n");
458 
459 	rte_eal_mp_remote_launch(test_atomic_usual, NULL, SKIP_MAIN);
460 	rte_atomic32_set(&synchro, 1);
461 	rte_eal_mp_wait_lcore();
462 	rte_atomic32_set(&synchro, 0);
463 
464 	if (rte_atomic16_read(&a16) != 1UL << 10) {
465 		printf("Atomic16 usual functions failed\n");
466 		return -1;
467 	}
468 
469 	if (rte_atomic32_read(&a32) != 1UL << 10) {
470 		printf("Atomic32 usual functions failed\n");
471 		return -1;
472 	}
473 
474 	if (rte_atomic64_read(&a64) != 1ULL << 33) {
475 		printf("Atomic64 usual functions failed\n");
476 		return -1;
477 	}
478 
479 	printf("test and set\n");
480 
481 	rte_atomic64_set(&a64, 0);
482 	rte_atomic32_set(&a32, 0);
483 	rte_atomic16_set(&a16, 0);
484 	rte_atomic64_set(&count, 0);
485 	rte_eal_mp_remote_launch(test_atomic_tas, NULL, SKIP_MAIN);
486 	rte_atomic32_set(&synchro, 1);
487 	rte_eal_mp_wait_lcore();
488 	rte_atomic32_set(&synchro, 0);
489 
490 	if (rte_atomic64_read(&count) != NUM_ATOMIC_TYPES) {
491 		printf("Atomic test and set failed\n");
492 		return -1;
493 	}
494 
495 	printf("add/sub and return\n");
496 
497 	rte_atomic64_set(&a64, 0);
498 	rte_atomic32_set(&a32, 0);
499 	rte_atomic16_set(&a16, 0);
500 	rte_atomic64_set(&count, 0);
501 	rte_eal_mp_remote_launch(test_atomic_addsub_and_return, NULL,
502 				 SKIP_MAIN);
503 	rte_atomic32_set(&synchro, 1);
504 	rte_eal_mp_wait_lcore();
505 	rte_atomic32_set(&synchro, 0);
506 
507 	if (rte_atomic64_read(&count) != 0) {
508 		printf("Atomic add/sub+return failed\n");
509 		return -1;
510 	}
511 
512 	/*
513 	 * Set a64, a32 and a16 with the same value of minus "number of worker
514 	 * lcores", launch all worker lcores to atomically increase by one and
515 	 * test them respectively.
516 	 * Each lcore should have only one chance to increase a64 by one and
517 	 * then check if it is equal to 0, but there should be only one lcore
518 	 * that finds that it is 0. It is similar for a32 and a16.
519 	 * Then a variable of "count", initialized to zero, is increased by
520 	 * one if a64, a32 or a16 is 0 after being increased and tested
521 	 * atomically.
522 	 * We can check if "count" is finally equal to 3 to see if all worker
523 	 * lcores performed "atomic inc and test" right.
524 	 */
525 	printf("inc and test\n");
526 
527 	rte_atomic64_clear(&a64);
528 	rte_atomic32_clear(&a32);
529 	rte_atomic16_clear(&a16);
530 	rte_atomic32_clear(&synchro);
531 	rte_atomic64_clear(&count);
532 
533 	rte_atomic64_set(&a64, (int64_t)(1 - (int64_t)rte_lcore_count()));
534 	rte_atomic32_set(&a32, (int32_t)(1 - (int32_t)rte_lcore_count()));
535 	rte_atomic16_set(&a16, (int16_t)(1 - (int16_t)rte_lcore_count()));
536 	rte_eal_mp_remote_launch(test_atomic_inc_and_test, NULL, SKIP_MAIN);
537 	rte_atomic32_set(&synchro, 1);
538 	rte_eal_mp_wait_lcore();
539 	rte_atomic32_clear(&synchro);
540 
541 	if (rte_atomic64_read(&count) != NUM_ATOMIC_TYPES) {
542 		printf("Atomic inc and test failed %d\n", (int)count.cnt);
543 		return -1;
544 	}
545 
546 	/*
547 	 * Same as above, but this time we set the values to "number of worker
548 	 * lcores", and decrement instead of increment.
549 	 */
550 	printf("dec and test\n");
551 
552 	rte_atomic32_clear(&synchro);
553 	rte_atomic64_clear(&count);
554 
555 	rte_atomic64_set(&a64, (int64_t)(rte_lcore_count() - 1));
556 	rte_atomic32_set(&a32, (int32_t)(rte_lcore_count() - 1));
557 	rte_atomic16_set(&a16, (int16_t)(rte_lcore_count() - 1));
558 	rte_eal_mp_remote_launch(test_atomic_dec_and_test, NULL, SKIP_MAIN);
559 	rte_atomic32_set(&synchro, 1);
560 	rte_eal_mp_wait_lcore();
561 	rte_atomic32_clear(&synchro);
562 
563 	if (rte_atomic64_read(&count) != NUM_ATOMIC_TYPES) {
564 		printf("Atomic dec and test failed\n");
565 		return -1;
566 	}
567 
568 #if defined(RTE_ARCH_X86_64) || defined(RTE_ARCH_ARM64)
569 	/*
570 	 * This case tests the functionality of rte_atomic128_cmp_exchange
571 	 * API. It calls rte_atomic128_cmp_exchange with four kinds of memory
572 	 * models successively on each worker core. Once each 128-bit atomic
573 	 * compare and swap operation is successful, it updates the global
574 	 * 128-bit counter by 2 for the first 64-bit and 1 for the second
575 	 * 64-bit. Each worker core iterates this test N times.
576 	 * At the end of test, verify whether the first 64-bits of the 128-bit
577 	 * counter and the second 64bits is differ by the total iterations. If
578 	 * it is, the test passes.
579 	 */
580 	printf("128-bit compare and swap test\n");
581 	uint64_t iterations = 0;
582 
583 	rte_atomic32_clear(&synchro);
584 	count128.val[0] = 0;
585 	count128.val[1] = 0;
586 
587 	rte_eal_mp_remote_launch(test_atomic128_cmp_exchange, NULL,
588 				 SKIP_MAIN);
589 	rte_atomic32_set(&synchro, 1);
590 	rte_eal_mp_wait_lcore();
591 	rte_atomic32_clear(&synchro);
592 
593 	iterations = count128.val[0] - count128.val[1];
594 	if (iterations != (uint64_t)4*N*(rte_lcore_count()-1)) {
595 		printf("128-bit compare and swap failed\n");
596 		return -1;
597 	}
598 #endif
599 
600 	/*
601 	 * Test 16/32/64bit atomic exchange.
602 	 */
603 	test64_t t;
604 
605 	printf("exchange test\n");
606 
607 	rte_atomic32_clear(&synchro);
608 	rte_atomic64_clear(&count);
609 
610 	/* Generate the CRC8 lookup table */
611 	build_crc8_table();
612 
613 	/* Create the initial tokens used by the test */
614 	t.u64 = rte_rand();
615 	token16 = (get_crc8(&t.u8[0], sizeof(token16) - 1) << 8)
616 		| (t.u16[0] & 0x00ff);
617 	token32 = ((uint32_t)get_crc8(&t.u8[0], sizeof(token32) - 1) << 24)
618 		| (t.u32[0] & 0x00ffffff);
619 	token64 = ((uint64_t)get_crc8(&t.u8[0], sizeof(token64) - 1) << 56)
620 		| (t.u64 & 0x00ffffffffffffff);
621 
622 	rte_eal_mp_remote_launch(test_atomic_exchange, NULL, SKIP_MAIN);
623 	rte_atomic32_set(&synchro, 1);
624 	rte_eal_mp_wait_lcore();
625 	rte_atomic32_clear(&synchro);
626 
627 	if (rte_atomic64_read(&count) > 0) {
628 		printf("Atomic exchange test failed\n");
629 		return -1;
630 	}
631 
632 	return 0;
633 }
634 REGISTER_FAST_TEST(atomic_autotest, false, true, test_atomic);
635