1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2014 Intel Corporation 3 * Copyright(c) 2019 Arm Limited 4 */ 5 6 #include <stdio.h> 7 #include <stdint.h> 8 #include <unistd.h> 9 #include <inttypes.h> 10 #include <sys/queue.h> 11 12 #include <rte_memory.h> 13 #include <rte_per_lcore.h> 14 #include <rte_launch.h> 15 #include <rte_atomic.h> 16 #include <rte_eal.h> 17 #include <rte_lcore.h> 18 #include <rte_random.h> 19 #include <rte_hash_crc.h> 20 21 #include "test.h" 22 23 /* 24 * Atomic Variables 25 * ================ 26 * 27 * - The main test function performs several subtests. The first 28 * checks that the usual inc/dec/add/sub functions are working 29 * correctly: 30 * 31 * - Initialize 16-bit, 32-bit and 64-bit atomic variables to specific 32 * values. 33 * 34 * - These variables are incremented and decremented on each core at 35 * the same time in ``test_atomic_usual()``. 36 * 37 * - The function checks that once all lcores finish their function, 38 * the value of the atomic variables are still the same. 39 * 40 * - Test "test and set" functions. 41 * 42 * - Initialize 16-bit, 32-bit and 64-bit atomic variables to zero. 43 * 44 * - Invoke ``test_atomic_tas()`` on each lcore: before doing anything 45 * else. The cores are waiting a synchro using ``while 46 * (rte_atomic32_read(&val) == 0)`` which is triggered by the main test 47 * function. Then all cores do a 48 * ``rte_atomicXX_test_and_set()`` at the same time. If it is successful, 49 * it increments another atomic counter. 50 * 51 * - The main function checks that the atomic counter was incremented 52 * twice only (one for 16-bit, one for 32-bit and one for 64-bit values). 53 * 54 * - Test "add/sub and return" functions 55 * 56 * - Initialize 16-bit, 32-bit and 64-bit atomic variables to zero. 57 * 58 * - Invoke ``test_atomic_addsub_return()`` on each lcore. Before doing 59 * anything else, the cores are waiting a synchro. Each lcore does 60 * this operation several times:: 61 * 62 * tmp = rte_atomicXX_add_return(&a, 1); 63 * atomic_add(&count, tmp); 64 * tmp = rte_atomicXX_sub_return(&a, 1); 65 * atomic_sub(&count, tmp+1); 66 * 67 * - At the end of the test, the *count* value must be 0. 68 * 69 * - Test "128-bit compare and swap" (aarch64 and x86_64 only) 70 * 71 * - Initialize 128-bit atomic variables to zero. 72 * 73 * - Invoke ``test_atomic128_cmp_exchange()`` on each lcore. Before doing 74 * anything else, the cores are waiting a synchro. Each lcore does 75 * these compare and swap (CAS) operations several times:: 76 * 77 * Acquired CAS update counter.val[0] + 2; counter.val[1] + 1; 78 * Released CAS update counter.val[0] + 2; counter.val[1] + 1; 79 * Acquired_Released CAS update counter.val[0] + 2; counter.val[1] + 1; 80 * Relaxed CAS update counter.val[0] + 2; counter.val[1] + 1; 81 * 82 * - At the end of the test, the *count128* first 64-bit value and 83 * second 64-bit value differ by the total iterations. 84 * 85 * - Test "atomic exchange" functions 86 * 87 * - Create a 64 bit token that can be tested for data integrity 88 * 89 * - Invoke ``test_atomic_exchange`` on each lcore. Before doing 90 * anything else, the cores wait for a synchronization event. 91 * Each core then does the follwoing for N iterations: 92 * 93 * Generate a new token with a data integrity check 94 * Exchange the new token for previously generated token 95 * Increment a counter if a corrupt token was received 96 * 97 * - At the end of the test, the number of corrupted tokens must be 0. 98 */ 99 100 #define NUM_ATOMIC_TYPES 3 101 102 #define N 1000000 103 104 static rte_atomic16_t a16; 105 static rte_atomic32_t a32; 106 static rte_atomic64_t a64; 107 static rte_atomic64_t count; 108 static rte_atomic32_t synchro; 109 110 static int 111 test_atomic_usual(__rte_unused void *arg) 112 { 113 unsigned i; 114 115 while (rte_atomic32_read(&synchro) == 0) 116 ; 117 118 for (i = 0; i < N; i++) 119 rte_atomic16_inc(&a16); 120 for (i = 0; i < N; i++) 121 rte_atomic16_dec(&a16); 122 for (i = 0; i < (N / 5); i++) 123 rte_atomic16_add(&a16, 5); 124 for (i = 0; i < (N / 5); i++) 125 rte_atomic16_sub(&a16, 5); 126 127 for (i = 0; i < N; i++) 128 rte_atomic32_inc(&a32); 129 for (i = 0; i < N; i++) 130 rte_atomic32_dec(&a32); 131 for (i = 0; i < (N / 5); i++) 132 rte_atomic32_add(&a32, 5); 133 for (i = 0; i < (N / 5); i++) 134 rte_atomic32_sub(&a32, 5); 135 136 for (i = 0; i < N; i++) 137 rte_atomic64_inc(&a64); 138 for (i = 0; i < N; i++) 139 rte_atomic64_dec(&a64); 140 for (i = 0; i < (N / 5); i++) 141 rte_atomic64_add(&a64, 5); 142 for (i = 0; i < (N / 5); i++) 143 rte_atomic64_sub(&a64, 5); 144 145 return 0; 146 } 147 148 static int 149 test_atomic_tas(__rte_unused void *arg) 150 { 151 while (rte_atomic32_read(&synchro) == 0) 152 ; 153 154 if (rte_atomic16_test_and_set(&a16)) 155 rte_atomic64_inc(&count); 156 if (rte_atomic32_test_and_set(&a32)) 157 rte_atomic64_inc(&count); 158 if (rte_atomic64_test_and_set(&a64)) 159 rte_atomic64_inc(&count); 160 161 return 0; 162 } 163 164 static int 165 test_atomic_addsub_and_return(__rte_unused void *arg) 166 { 167 uint32_t tmp16; 168 uint32_t tmp32; 169 uint64_t tmp64; 170 unsigned i; 171 172 while (rte_atomic32_read(&synchro) == 0) 173 ; 174 175 for (i = 0; i < N; i++) { 176 tmp16 = rte_atomic16_add_return(&a16, 1); 177 rte_atomic64_add(&count, tmp16); 178 179 tmp16 = rte_atomic16_sub_return(&a16, 1); 180 rte_atomic64_sub(&count, tmp16+1); 181 182 tmp32 = rte_atomic32_add_return(&a32, 1); 183 rte_atomic64_add(&count, tmp32); 184 185 tmp32 = rte_atomic32_sub_return(&a32, 1); 186 rte_atomic64_sub(&count, tmp32+1); 187 188 tmp64 = rte_atomic64_add_return(&a64, 1); 189 rte_atomic64_add(&count, tmp64); 190 191 tmp64 = rte_atomic64_sub_return(&a64, 1); 192 rte_atomic64_sub(&count, tmp64+1); 193 } 194 195 return 0; 196 } 197 198 /* 199 * rte_atomic32_inc_and_test() would increase a 32 bits counter by one and then 200 * test if that counter is equal to 0. It would return true if the counter is 0 201 * and false if the counter is not 0. rte_atomic64_inc_and_test() could do the 202 * same thing but for a 64 bits counter. 203 * Here checks that if the 32/64 bits counter is equal to 0 after being atomically 204 * increased by one. If it is, increase the variable of "count" by one which would 205 * be checked as the result later. 206 * 207 */ 208 static int 209 test_atomic_inc_and_test(__rte_unused void *arg) 210 { 211 while (rte_atomic32_read(&synchro) == 0) 212 ; 213 214 if (rte_atomic16_inc_and_test(&a16)) { 215 rte_atomic64_inc(&count); 216 } 217 if (rte_atomic32_inc_and_test(&a32)) { 218 rte_atomic64_inc(&count); 219 } 220 if (rte_atomic64_inc_and_test(&a64)) { 221 rte_atomic64_inc(&count); 222 } 223 224 return 0; 225 } 226 227 /* 228 * rte_atomicXX_dec_and_test() should decrease a 32 bits counter by one and then 229 * test if that counter is equal to 0. It should return true if the counter is 0 230 * and false if the counter is not 0. 231 * This test checks if the counter is equal to 0 after being atomically 232 * decreased by one. If it is, increase the value of "count" by one which is to 233 * be checked as the result later. 234 */ 235 static int 236 test_atomic_dec_and_test(__rte_unused void *arg) 237 { 238 while (rte_atomic32_read(&synchro) == 0) 239 ; 240 241 if (rte_atomic16_dec_and_test(&a16)) 242 rte_atomic64_inc(&count); 243 244 if (rte_atomic32_dec_and_test(&a32)) 245 rte_atomic64_inc(&count); 246 247 if (rte_atomic64_dec_and_test(&a64)) 248 rte_atomic64_inc(&count); 249 250 return 0; 251 } 252 253 #if defined(RTE_ARCH_X86_64) || defined(RTE_ARCH_ARM64) 254 static rte_int128_t count128; 255 256 /* 257 * rte_atomic128_cmp_exchange() should update a 128 bits counter's first 64 258 * bits by 2 and the second 64 bits by 1 in this test. It should return true 259 * if the compare exchange operation is successful. 260 * This test repeats 128 bits compare and swap operations N rounds. In each 261 * iteration it runs compare and swap operation with different memory models. 262 */ 263 static int 264 test_atomic128_cmp_exchange(__rte_unused void *arg) 265 { 266 rte_int128_t expected; 267 int success; 268 unsigned int i; 269 270 while (rte_atomic32_read(&synchro) == 0) 271 ; 272 273 expected = count128; 274 275 for (i = 0; i < N; i++) { 276 do { 277 rte_int128_t desired; 278 279 desired.val[0] = expected.val[0] + 2; 280 desired.val[1] = expected.val[1] + 1; 281 282 success = rte_atomic128_cmp_exchange(&count128, 283 &expected, &desired, 1, 284 __ATOMIC_ACQUIRE, __ATOMIC_RELAXED); 285 } while (success == 0); 286 287 do { 288 rte_int128_t desired; 289 290 desired.val[0] = expected.val[0] + 2; 291 desired.val[1] = expected.val[1] + 1; 292 293 success = rte_atomic128_cmp_exchange(&count128, 294 &expected, &desired, 1, 295 __ATOMIC_RELEASE, __ATOMIC_RELAXED); 296 } while (success == 0); 297 298 do { 299 rte_int128_t desired; 300 301 desired.val[0] = expected.val[0] + 2; 302 desired.val[1] = expected.val[1] + 1; 303 304 success = rte_atomic128_cmp_exchange(&count128, 305 &expected, &desired, 1, 306 __ATOMIC_ACQ_REL, __ATOMIC_RELAXED); 307 } while (success == 0); 308 309 do { 310 rte_int128_t desired; 311 312 desired.val[0] = expected.val[0] + 2; 313 desired.val[1] = expected.val[1] + 1; 314 315 success = rte_atomic128_cmp_exchange(&count128, 316 &expected, &desired, 1, 317 __ATOMIC_RELAXED, __ATOMIC_RELAXED); 318 } while (success == 0); 319 } 320 321 return 0; 322 } 323 #endif 324 325 /* 326 * Helper definitions/variables/functions for 327 * atomic exchange tests 328 */ 329 typedef union { 330 uint16_t u16; 331 uint8_t u8[2]; 332 } test16_t; 333 334 typedef union { 335 uint32_t u32; 336 uint16_t u16[2]; 337 uint8_t u8[4]; 338 } test32_t; 339 340 typedef union { 341 uint64_t u64; 342 uint32_t u32[2]; 343 uint16_t u16[4]; 344 uint8_t u8[8]; 345 } test64_t; 346 347 const uint8_t CRC8_POLY = 0x91; 348 uint8_t crc8_table[256]; 349 350 volatile uint16_t token16; 351 volatile uint32_t token32; 352 volatile uint64_t token64; 353 354 static void 355 build_crc8_table(void) 356 { 357 uint8_t val; 358 int i, j; 359 360 for (i = 0; i < 256; i++) { 361 val = i; 362 for (j = 0; j < 8; j++) { 363 if (val & 1) 364 val ^= CRC8_POLY; 365 val >>= 1; 366 } 367 crc8_table[i] = val; 368 } 369 } 370 371 static uint8_t 372 get_crc8(uint8_t *message, int length) 373 { 374 uint8_t crc = 0; 375 int i; 376 377 for (i = 0; i < length; i++) 378 crc = crc8_table[crc ^ message[i]]; 379 return crc; 380 } 381 382 /* 383 * The atomic exchange test sets up a token in memory and 384 * then spins up multiple lcores whose job is to generate 385 * new tokens, exchange that new token for the old one held 386 * in memory, and then verify that the old token is still 387 * valid (i.e. the exchange did not corrupt the token). 388 * 389 * A token is made up of random data and 8 bits of crc 390 * covering that random data. The following is an example 391 * of a 64bit token. 392 * 393 * +------------+------------+ 394 * | 63 56 | 55 0 | 395 * +------------+------------+ 396 * | CRC8 | Data | 397 * +------------+------------+ 398 */ 399 static int 400 test_atomic_exchange(__rte_unused void *arg) 401 { 402 int i; 403 test16_t nt16, ot16; /* new token, old token */ 404 test32_t nt32, ot32; 405 test64_t nt64, ot64; 406 407 /* Wait until all of the other threads have been dispatched */ 408 while (rte_atomic32_read(&synchro) == 0) 409 ; 410 411 /* 412 * Let the battle begin! Every thread attempts to steal the current 413 * token with an atomic exchange operation and install its own newly 414 * generated token. If the old token is valid (i.e. it has the 415 * appropriate crc32 hash for the data) then the test iteration has 416 * passed. If the token is invalid, increment the counter. 417 */ 418 for (i = 0; i < N; i++) { 419 420 /* Test 64bit Atomic Exchange */ 421 nt64.u64 = rte_rand(); 422 nt64.u8[7] = get_crc8(&nt64.u8[0], sizeof(nt64) - 1); 423 ot64.u64 = rte_atomic64_exchange(&token64, nt64.u64); 424 if (ot64.u8[7] != get_crc8(&ot64.u8[0], sizeof(ot64) - 1)) 425 rte_atomic64_inc(&count); 426 427 /* Test 32bit Atomic Exchange */ 428 nt32.u32 = (uint32_t)rte_rand(); 429 nt32.u8[3] = get_crc8(&nt32.u8[0], sizeof(nt32) - 1); 430 ot32.u32 = rte_atomic32_exchange(&token32, nt32.u32); 431 if (ot32.u8[3] != get_crc8(&ot32.u8[0], sizeof(ot32) - 1)) 432 rte_atomic64_inc(&count); 433 434 /* Test 16bit Atomic Exchange */ 435 nt16.u16 = (uint16_t)rte_rand(); 436 nt16.u8[1] = get_crc8(&nt16.u8[0], sizeof(nt16) - 1); 437 ot16.u16 = rte_atomic16_exchange(&token16, nt16.u16); 438 if (ot16.u8[1] != get_crc8(&ot16.u8[0], sizeof(ot16) - 1)) 439 rte_atomic64_inc(&count); 440 } 441 442 return 0; 443 } 444 static int 445 test_atomic(void) 446 { 447 rte_atomic16_init(&a16); 448 rte_atomic32_init(&a32); 449 rte_atomic64_init(&a64); 450 rte_atomic64_init(&count); 451 rte_atomic32_init(&synchro); 452 453 rte_atomic16_set(&a16, 1UL << 10); 454 rte_atomic32_set(&a32, 1UL << 10); 455 rte_atomic64_set(&a64, 1ULL << 33); 456 457 printf("usual inc/dec/add/sub functions\n"); 458 459 rte_eal_mp_remote_launch(test_atomic_usual, NULL, SKIP_MAIN); 460 rte_atomic32_set(&synchro, 1); 461 rte_eal_mp_wait_lcore(); 462 rte_atomic32_set(&synchro, 0); 463 464 if (rte_atomic16_read(&a16) != 1UL << 10) { 465 printf("Atomic16 usual functions failed\n"); 466 return -1; 467 } 468 469 if (rte_atomic32_read(&a32) != 1UL << 10) { 470 printf("Atomic32 usual functions failed\n"); 471 return -1; 472 } 473 474 if (rte_atomic64_read(&a64) != 1ULL << 33) { 475 printf("Atomic64 usual functions failed\n"); 476 return -1; 477 } 478 479 printf("test and set\n"); 480 481 rte_atomic64_set(&a64, 0); 482 rte_atomic32_set(&a32, 0); 483 rte_atomic16_set(&a16, 0); 484 rte_atomic64_set(&count, 0); 485 rte_eal_mp_remote_launch(test_atomic_tas, NULL, SKIP_MAIN); 486 rte_atomic32_set(&synchro, 1); 487 rte_eal_mp_wait_lcore(); 488 rte_atomic32_set(&synchro, 0); 489 490 if (rte_atomic64_read(&count) != NUM_ATOMIC_TYPES) { 491 printf("Atomic test and set failed\n"); 492 return -1; 493 } 494 495 printf("add/sub and return\n"); 496 497 rte_atomic64_set(&a64, 0); 498 rte_atomic32_set(&a32, 0); 499 rte_atomic16_set(&a16, 0); 500 rte_atomic64_set(&count, 0); 501 rte_eal_mp_remote_launch(test_atomic_addsub_and_return, NULL, 502 SKIP_MAIN); 503 rte_atomic32_set(&synchro, 1); 504 rte_eal_mp_wait_lcore(); 505 rte_atomic32_set(&synchro, 0); 506 507 if (rte_atomic64_read(&count) != 0) { 508 printf("Atomic add/sub+return failed\n"); 509 return -1; 510 } 511 512 /* 513 * Set a64, a32 and a16 with the same value of minus "number of worker 514 * lcores", launch all worker lcores to atomically increase by one and 515 * test them respectively. 516 * Each lcore should have only one chance to increase a64 by one and 517 * then check if it is equal to 0, but there should be only one lcore 518 * that finds that it is 0. It is similar for a32 and a16. 519 * Then a variable of "count", initialized to zero, is increased by 520 * one if a64, a32 or a16 is 0 after being increased and tested 521 * atomically. 522 * We can check if "count" is finally equal to 3 to see if all worker 523 * lcores performed "atomic inc and test" right. 524 */ 525 printf("inc and test\n"); 526 527 rte_atomic64_clear(&a64); 528 rte_atomic32_clear(&a32); 529 rte_atomic16_clear(&a16); 530 rte_atomic32_clear(&synchro); 531 rte_atomic64_clear(&count); 532 533 rte_atomic64_set(&a64, (int64_t)(1 - (int64_t)rte_lcore_count())); 534 rte_atomic32_set(&a32, (int32_t)(1 - (int32_t)rte_lcore_count())); 535 rte_atomic16_set(&a16, (int16_t)(1 - (int16_t)rte_lcore_count())); 536 rte_eal_mp_remote_launch(test_atomic_inc_and_test, NULL, SKIP_MAIN); 537 rte_atomic32_set(&synchro, 1); 538 rte_eal_mp_wait_lcore(); 539 rte_atomic32_clear(&synchro); 540 541 if (rte_atomic64_read(&count) != NUM_ATOMIC_TYPES) { 542 printf("Atomic inc and test failed %d\n", (int)count.cnt); 543 return -1; 544 } 545 546 /* 547 * Same as above, but this time we set the values to "number of worker 548 * lcores", and decrement instead of increment. 549 */ 550 printf("dec and test\n"); 551 552 rte_atomic32_clear(&synchro); 553 rte_atomic64_clear(&count); 554 555 rte_atomic64_set(&a64, (int64_t)(rte_lcore_count() - 1)); 556 rte_atomic32_set(&a32, (int32_t)(rte_lcore_count() - 1)); 557 rte_atomic16_set(&a16, (int16_t)(rte_lcore_count() - 1)); 558 rte_eal_mp_remote_launch(test_atomic_dec_and_test, NULL, SKIP_MAIN); 559 rte_atomic32_set(&synchro, 1); 560 rte_eal_mp_wait_lcore(); 561 rte_atomic32_clear(&synchro); 562 563 if (rte_atomic64_read(&count) != NUM_ATOMIC_TYPES) { 564 printf("Atomic dec and test failed\n"); 565 return -1; 566 } 567 568 #if defined(RTE_ARCH_X86_64) || defined(RTE_ARCH_ARM64) 569 /* 570 * This case tests the functionality of rte_atomic128_cmp_exchange 571 * API. It calls rte_atomic128_cmp_exchange with four kinds of memory 572 * models successively on each worker core. Once each 128-bit atomic 573 * compare and swap operation is successful, it updates the global 574 * 128-bit counter by 2 for the first 64-bit and 1 for the second 575 * 64-bit. Each worker core iterates this test N times. 576 * At the end of test, verify whether the first 64-bits of the 128-bit 577 * counter and the second 64bits is differ by the total iterations. If 578 * it is, the test passes. 579 */ 580 printf("128-bit compare and swap test\n"); 581 uint64_t iterations = 0; 582 583 rte_atomic32_clear(&synchro); 584 count128.val[0] = 0; 585 count128.val[1] = 0; 586 587 rte_eal_mp_remote_launch(test_atomic128_cmp_exchange, NULL, 588 SKIP_MAIN); 589 rte_atomic32_set(&synchro, 1); 590 rte_eal_mp_wait_lcore(); 591 rte_atomic32_clear(&synchro); 592 593 iterations = count128.val[0] - count128.val[1]; 594 if (iterations != 4*N*(rte_lcore_count()-1)) { 595 printf("128-bit compare and swap failed\n"); 596 return -1; 597 } 598 #endif 599 600 /* 601 * Test 16/32/64bit atomic exchange. 602 */ 603 test64_t t; 604 605 printf("exchange test\n"); 606 607 rte_atomic32_clear(&synchro); 608 rte_atomic64_clear(&count); 609 610 /* Generate the CRC8 lookup table */ 611 build_crc8_table(); 612 613 /* Create the initial tokens used by the test */ 614 t.u64 = rte_rand(); 615 token16 = (get_crc8(&t.u8[0], sizeof(token16) - 1) << 8) 616 | (t.u16[0] & 0x00ff); 617 token32 = ((uint32_t)get_crc8(&t.u8[0], sizeof(token32) - 1) << 24) 618 | (t.u32[0] & 0x00ffffff); 619 token64 = ((uint64_t)get_crc8(&t.u8[0], sizeof(token64) - 1) << 56) 620 | (t.u64 & 0x00ffffffffffffff); 621 622 rte_eal_mp_remote_launch(test_atomic_exchange, NULL, SKIP_MAIN); 623 rte_atomic32_set(&synchro, 1); 624 rte_eal_mp_wait_lcore(); 625 rte_atomic32_clear(&synchro); 626 627 if (rte_atomic64_read(&count) > 0) { 628 printf("Atomic exchange test failed\n"); 629 return -1; 630 } 631 632 return 0; 633 } 634 REGISTER_TEST_COMMAND(atomic_autotest, test_atomic); 635