1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) 2018 Arm Limited 3 */ 4 5 #include <stdio.h> 6 #include <stdbool.h> 7 #include <inttypes.h> 8 #include <rte_pause.h> 9 #include <rte_rcu_qsbr.h> 10 #include <rte_hash.h> 11 #include <rte_hash_crc.h> 12 #include <rte_malloc.h> 13 #include <rte_cycles.h> 14 #include <unistd.h> 15 16 #include "test.h" 17 18 /* Check condition and return an error if true. */ 19 static uint16_t enabled_core_ids[RTE_MAX_LCORE]; 20 static unsigned int num_cores; 21 22 static uint32_t *keys; 23 #define TOTAL_ENTRY (1024 * 8) 24 #define COUNTER_VALUE 4096 25 static uint32_t *hash_data[TOTAL_ENTRY]; 26 static volatile uint8_t writer_done; 27 static volatile uint8_t all_registered; 28 static volatile uint32_t thr_id; 29 30 static struct rte_rcu_qsbr *t[RTE_MAX_LCORE]; 31 static struct rte_hash *h; 32 static char hash_name[8]; 33 static rte_atomic64_t updates, checks; 34 static rte_atomic64_t update_cycles, check_cycles; 35 36 /* Scale down results to 1000 operations to support lower 37 * granularity clocks. 38 */ 39 #define RCU_SCALE_DOWN 1000 40 41 /* Simple way to allocate thread ids in 0 to RTE_MAX_LCORE space */ 42 static inline uint32_t 43 alloc_thread_id(void) 44 { 45 uint32_t tmp_thr_id; 46 47 tmp_thr_id = __atomic_fetch_add(&thr_id, 1, __ATOMIC_RELAXED); 48 if (tmp_thr_id >= RTE_MAX_LCORE) 49 printf("Invalid thread id %u\n", tmp_thr_id); 50 51 return tmp_thr_id; 52 } 53 54 static int 55 test_rcu_qsbr_reader_perf(void *arg) 56 { 57 bool writer_present = (bool)arg; 58 uint32_t thread_id = alloc_thread_id(); 59 uint64_t loop_cnt = 0; 60 uint64_t begin, cycles; 61 62 /* Register for report QS */ 63 rte_rcu_qsbr_thread_register(t[0], thread_id); 64 /* Make the thread online */ 65 rte_rcu_qsbr_thread_online(t[0], thread_id); 66 67 begin = rte_rdtsc_precise(); 68 69 if (writer_present) { 70 while (!writer_done) { 71 /* Update quiescent state counter */ 72 rte_rcu_qsbr_quiescent(t[0], thread_id); 73 loop_cnt++; 74 } 75 } else { 76 while (loop_cnt < 100000000) { 77 /* Update quiescent state counter */ 78 rte_rcu_qsbr_quiescent(t[0], thread_id); 79 loop_cnt++; 80 } 81 } 82 83 cycles = rte_rdtsc_precise() - begin; 84 rte_atomic64_add(&update_cycles, cycles); 85 rte_atomic64_add(&updates, loop_cnt); 86 87 /* Make the thread offline */ 88 rte_rcu_qsbr_thread_offline(t[0], thread_id); 89 /* Unregister before exiting to avoid writer from waiting */ 90 rte_rcu_qsbr_thread_unregister(t[0], thread_id); 91 92 return 0; 93 } 94 95 static int 96 test_rcu_qsbr_writer_perf(void *arg) 97 { 98 bool wait = (bool)arg; 99 uint64_t token = 0; 100 uint64_t loop_cnt = 0; 101 uint64_t begin, cycles; 102 103 begin = rte_rdtsc_precise(); 104 105 do { 106 /* Start the quiescent state query process */ 107 if (wait) 108 token = rte_rcu_qsbr_start(t[0]); 109 110 /* Check quiescent state status */ 111 rte_rcu_qsbr_check(t[0], token, wait); 112 loop_cnt++; 113 } while (loop_cnt < 20000000); 114 115 cycles = rte_rdtsc_precise() - begin; 116 rte_atomic64_add(&check_cycles, cycles); 117 rte_atomic64_add(&checks, loop_cnt); 118 return 0; 119 } 120 121 /* 122 * Perf test: Reader/writer 123 * Single writer, Multiple Readers, Single QS var, Non-Blocking rcu_qsbr_check 124 */ 125 static int 126 test_rcu_qsbr_perf(void) 127 { 128 size_t sz; 129 unsigned int i, tmp_num_cores; 130 131 writer_done = 0; 132 133 rte_atomic64_clear(&updates); 134 rte_atomic64_clear(&update_cycles); 135 rte_atomic64_clear(&checks); 136 rte_atomic64_clear(&check_cycles); 137 138 printf("\nPerf Test: %d Readers/1 Writer('wait' in qsbr_check == true)\n", 139 num_cores - 1); 140 141 __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); 142 143 if (all_registered == 1) 144 tmp_num_cores = num_cores - 1; 145 else 146 tmp_num_cores = RTE_MAX_LCORE; 147 148 sz = rte_rcu_qsbr_get_memsize(tmp_num_cores); 149 t[0] = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz, 150 RTE_CACHE_LINE_SIZE); 151 /* QS variable is initialized */ 152 rte_rcu_qsbr_init(t[0], tmp_num_cores); 153 154 /* Reader threads are launched */ 155 for (i = 0; i < num_cores - 1; i++) 156 rte_eal_remote_launch(test_rcu_qsbr_reader_perf, (void *)1, 157 enabled_core_ids[i]); 158 159 /* Writer thread is launched */ 160 rte_eal_remote_launch(test_rcu_qsbr_writer_perf, 161 (void *)1, enabled_core_ids[i]); 162 163 /* Wait for the writer thread */ 164 rte_eal_wait_lcore(enabled_core_ids[i]); 165 writer_done = 1; 166 167 /* Wait until all readers have exited */ 168 rte_eal_mp_wait_lcore(); 169 170 printf("Total quiescent state updates = %"PRIi64"\n", 171 rte_atomic64_read(&updates)); 172 printf("Cycles per %d quiescent state updates: %"PRIi64"\n", 173 RCU_SCALE_DOWN, 174 rte_atomic64_read(&update_cycles) / 175 (rte_atomic64_read(&updates) / RCU_SCALE_DOWN)); 176 printf("Total RCU checks = %"PRIi64"\n", rte_atomic64_read(&checks)); 177 printf("Cycles per %d checks: %"PRIi64"\n", RCU_SCALE_DOWN, 178 rte_atomic64_read(&check_cycles) / 179 (rte_atomic64_read(&checks) / RCU_SCALE_DOWN)); 180 181 rte_free(t[0]); 182 183 return 0; 184 } 185 186 /* 187 * Perf test: Readers 188 * Single writer, Multiple readers, Single QS variable 189 */ 190 static int 191 test_rcu_qsbr_rperf(void) 192 { 193 size_t sz; 194 unsigned int i, tmp_num_cores; 195 196 rte_atomic64_clear(&updates); 197 rte_atomic64_clear(&update_cycles); 198 199 __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); 200 201 printf("\nPerf Test: %d Readers\n", num_cores); 202 203 if (all_registered == 1) 204 tmp_num_cores = num_cores; 205 else 206 tmp_num_cores = RTE_MAX_LCORE; 207 208 sz = rte_rcu_qsbr_get_memsize(tmp_num_cores); 209 t[0] = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz, 210 RTE_CACHE_LINE_SIZE); 211 /* QS variable is initialized */ 212 rte_rcu_qsbr_init(t[0], tmp_num_cores); 213 214 /* Reader threads are launched */ 215 for (i = 0; i < num_cores; i++) 216 rte_eal_remote_launch(test_rcu_qsbr_reader_perf, NULL, 217 enabled_core_ids[i]); 218 219 /* Wait until all readers have exited */ 220 rte_eal_mp_wait_lcore(); 221 222 printf("Total quiescent state updates = %"PRIi64"\n", 223 rte_atomic64_read(&updates)); 224 printf("Cycles per %d quiescent state updates: %"PRIi64"\n", 225 RCU_SCALE_DOWN, 226 rte_atomic64_read(&update_cycles) / 227 (rte_atomic64_read(&updates) / RCU_SCALE_DOWN)); 228 229 rte_free(t[0]); 230 231 return 0; 232 } 233 234 /* 235 * Perf test: 236 * Multiple writer, Single QS variable, Non-blocking rcu_qsbr_check 237 */ 238 static int 239 test_rcu_qsbr_wperf(void) 240 { 241 size_t sz; 242 unsigned int i; 243 244 rte_atomic64_clear(&checks); 245 rte_atomic64_clear(&check_cycles); 246 247 __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); 248 249 printf("\nPerf test: %d Writers ('wait' in qsbr_check == false)\n", 250 num_cores); 251 252 /* Number of readers does not matter for QS variable in this test 253 * case as no reader will be registered. 254 */ 255 sz = rte_rcu_qsbr_get_memsize(RTE_MAX_LCORE); 256 t[0] = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz, 257 RTE_CACHE_LINE_SIZE); 258 /* QS variable is initialized */ 259 rte_rcu_qsbr_init(t[0], RTE_MAX_LCORE); 260 261 /* Writer threads are launched */ 262 for (i = 0; i < num_cores; i++) 263 rte_eal_remote_launch(test_rcu_qsbr_writer_perf, 264 (void *)0, enabled_core_ids[i]); 265 266 /* Wait until all readers have exited */ 267 rte_eal_mp_wait_lcore(); 268 269 printf("Total RCU checks = %"PRIi64"\n", rte_atomic64_read(&checks)); 270 printf("Cycles per %d checks: %"PRIi64"\n", RCU_SCALE_DOWN, 271 rte_atomic64_read(&check_cycles) / 272 (rte_atomic64_read(&checks) / RCU_SCALE_DOWN)); 273 274 rte_free(t[0]); 275 276 return 0; 277 } 278 279 /* 280 * RCU test cases using rte_hash data structure. 281 */ 282 static int 283 test_rcu_qsbr_hash_reader(void *arg) 284 { 285 struct rte_rcu_qsbr *temp; 286 struct rte_hash *hash = NULL; 287 int i; 288 uint64_t loop_cnt = 0; 289 uint64_t begin, cycles; 290 uint32_t thread_id = alloc_thread_id(); 291 uint8_t read_type = (uint8_t)((uintptr_t)arg); 292 uint32_t *pdata; 293 294 temp = t[read_type]; 295 hash = h; 296 297 rte_rcu_qsbr_thread_register(temp, thread_id); 298 299 begin = rte_rdtsc_precise(); 300 301 do { 302 rte_rcu_qsbr_thread_online(temp, thread_id); 303 for (i = 0; i < TOTAL_ENTRY; i++) { 304 rte_rcu_qsbr_lock(temp, thread_id); 305 if (rte_hash_lookup_data(hash, keys + i, 306 (void **)&pdata) != -ENOENT) { 307 pdata[thread_id] = 0; 308 while (pdata[thread_id] < COUNTER_VALUE) 309 pdata[thread_id]++; 310 } 311 rte_rcu_qsbr_unlock(temp, thread_id); 312 } 313 /* Update quiescent state counter */ 314 rte_rcu_qsbr_quiescent(temp, thread_id); 315 rte_rcu_qsbr_thread_offline(temp, thread_id); 316 loop_cnt++; 317 } while (!writer_done); 318 319 cycles = rte_rdtsc_precise() - begin; 320 rte_atomic64_add(&update_cycles, cycles); 321 rte_atomic64_add(&updates, loop_cnt); 322 323 rte_rcu_qsbr_thread_unregister(temp, thread_id); 324 325 return 0; 326 } 327 328 static struct rte_hash *init_hash(void) 329 { 330 int i; 331 struct rte_hash *hash = NULL; 332 333 snprintf(hash_name, 8, "hash"); 334 struct rte_hash_parameters hash_params = { 335 .entries = TOTAL_ENTRY, 336 .key_len = sizeof(uint32_t), 337 .hash_func_init_val = 0, 338 .socket_id = rte_socket_id(), 339 .hash_func = rte_hash_crc, 340 .extra_flag = 341 RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY_LF, 342 .name = hash_name, 343 }; 344 345 hash = rte_hash_create(&hash_params); 346 if (hash == NULL) { 347 printf("Hash create Failed\n"); 348 return NULL; 349 } 350 351 for (i = 0; i < TOTAL_ENTRY; i++) { 352 hash_data[i] = rte_zmalloc(NULL, 353 sizeof(uint32_t) * RTE_MAX_LCORE, 0); 354 if (hash_data[i] == NULL) { 355 printf("No memory\n"); 356 return NULL; 357 } 358 } 359 keys = rte_malloc(NULL, sizeof(uint32_t) * TOTAL_ENTRY, 0); 360 if (keys == NULL) { 361 printf("No memory\n"); 362 return NULL; 363 } 364 365 for (i = 0; i < TOTAL_ENTRY; i++) 366 keys[i] = i; 367 368 for (i = 0; i < TOTAL_ENTRY; i++) { 369 if (rte_hash_add_key_data(hash, keys + i, 370 (void *)((uintptr_t)hash_data[i])) < 0) { 371 printf("Hash key add Failed #%d\n", i); 372 return NULL; 373 } 374 } 375 return hash; 376 } 377 378 /* 379 * Functional test: 380 * Single writer, Single QS variable Single QSBR query, Blocking rcu_qsbr_check 381 */ 382 static int 383 test_rcu_qsbr_sw_sv_1qs(void) 384 { 385 uint64_t token, begin, cycles; 386 size_t sz; 387 unsigned int i, j, tmp_num_cores; 388 int32_t pos; 389 390 writer_done = 0; 391 392 rte_atomic64_clear(&updates); 393 rte_atomic64_clear(&update_cycles); 394 rte_atomic64_clear(&checks); 395 rte_atomic64_clear(&check_cycles); 396 397 __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); 398 399 printf("\nPerf test: 1 writer, %d readers, 1 QSBR variable, 1 QSBR Query, Blocking QSBR Check\n", num_cores); 400 401 if (all_registered == 1) 402 tmp_num_cores = num_cores; 403 else 404 tmp_num_cores = RTE_MAX_LCORE; 405 406 sz = rte_rcu_qsbr_get_memsize(tmp_num_cores); 407 t[0] = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz, 408 RTE_CACHE_LINE_SIZE); 409 /* QS variable is initialized */ 410 rte_rcu_qsbr_init(t[0], tmp_num_cores); 411 412 /* Shared data structure created */ 413 h = init_hash(); 414 if (h == NULL) { 415 printf("Hash init failed\n"); 416 goto error; 417 } 418 419 /* Reader threads are launched */ 420 for (i = 0; i < num_cores; i++) 421 rte_eal_remote_launch(test_rcu_qsbr_hash_reader, NULL, 422 enabled_core_ids[i]); 423 424 begin = rte_rdtsc_precise(); 425 426 for (i = 0; i < TOTAL_ENTRY; i++) { 427 /* Delete elements from the shared data structure */ 428 pos = rte_hash_del_key(h, keys + i); 429 if (pos < 0) { 430 printf("Delete key failed #%d\n", keys[i]); 431 goto error; 432 } 433 /* Start the quiescent state query process */ 434 token = rte_rcu_qsbr_start(t[0]); 435 436 /* Check the quiescent state status */ 437 rte_rcu_qsbr_check(t[0], token, true); 438 for (j = 0; j < tmp_num_cores; j++) { 439 if (hash_data[i][j] != COUNTER_VALUE && 440 hash_data[i][j] != 0) { 441 printf("Reader thread ID %u did not complete #%d = %d\n", 442 j, i, hash_data[i][j]); 443 goto error; 444 } 445 } 446 447 if (rte_hash_free_key_with_position(h, pos) < 0) { 448 printf("Failed to free the key #%d\n", keys[i]); 449 goto error; 450 } 451 rte_free(hash_data[i]); 452 hash_data[i] = NULL; 453 } 454 455 cycles = rte_rdtsc_precise() - begin; 456 rte_atomic64_add(&check_cycles, cycles); 457 rte_atomic64_add(&checks, i); 458 459 writer_done = 1; 460 461 /* Wait and check return value from reader threads */ 462 for (i = 0; i < num_cores; i++) 463 if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) 464 goto error; 465 rte_hash_free(h); 466 rte_free(keys); 467 468 printf("Following numbers include calls to rte_hash functions\n"); 469 printf("Cycles per 1 quiescent state update(online/update/offline): %"PRIi64"\n", 470 rte_atomic64_read(&update_cycles) / 471 rte_atomic64_read(&updates)); 472 473 printf("Cycles per 1 check(start, check): %"PRIi64"\n\n", 474 rte_atomic64_read(&check_cycles) / 475 rte_atomic64_read(&checks)); 476 477 rte_free(t[0]); 478 479 return 0; 480 481 error: 482 writer_done = 1; 483 /* Wait until all readers have exited */ 484 rte_eal_mp_wait_lcore(); 485 486 rte_hash_free(h); 487 rte_free(keys); 488 for (i = 0; i < TOTAL_ENTRY; i++) 489 rte_free(hash_data[i]); 490 491 rte_free(t[0]); 492 493 return -1; 494 } 495 496 /* 497 * Functional test: 498 * Single writer, Single QS variable, Single QSBR query, 499 * Non-blocking rcu_qsbr_check 500 */ 501 static int 502 test_rcu_qsbr_sw_sv_1qs_non_blocking(void) 503 { 504 uint64_t token, begin, cycles; 505 int ret; 506 size_t sz; 507 unsigned int i, j, tmp_num_cores; 508 int32_t pos; 509 510 writer_done = 0; 511 512 printf("Perf test: 1 writer, %d readers, 1 QSBR variable, 1 QSBR Query, Non-Blocking QSBR check\n", num_cores); 513 514 __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); 515 516 if (all_registered == 1) 517 tmp_num_cores = num_cores; 518 else 519 tmp_num_cores = RTE_MAX_LCORE; 520 521 sz = rte_rcu_qsbr_get_memsize(tmp_num_cores); 522 t[0] = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz, 523 RTE_CACHE_LINE_SIZE); 524 /* QS variable is initialized */ 525 rte_rcu_qsbr_init(t[0], tmp_num_cores); 526 527 /* Shared data structure created */ 528 h = init_hash(); 529 if (h == NULL) { 530 printf("Hash init failed\n"); 531 goto error; 532 } 533 534 /* Reader threads are launched */ 535 for (i = 0; i < num_cores; i++) 536 rte_eal_remote_launch(test_rcu_qsbr_hash_reader, NULL, 537 enabled_core_ids[i]); 538 539 begin = rte_rdtsc_precise(); 540 541 for (i = 0; i < TOTAL_ENTRY; i++) { 542 /* Delete elements from the shared data structure */ 543 pos = rte_hash_del_key(h, keys + i); 544 if (pos < 0) { 545 printf("Delete key failed #%d\n", keys[i]); 546 goto error; 547 } 548 /* Start the quiescent state query process */ 549 token = rte_rcu_qsbr_start(t[0]); 550 551 /* Check the quiescent state status */ 552 do { 553 ret = rte_rcu_qsbr_check(t[0], token, false); 554 } while (ret == 0); 555 for (j = 0; j < tmp_num_cores; j++) { 556 if (hash_data[i][j] != COUNTER_VALUE && 557 hash_data[i][j] != 0) { 558 printf("Reader thread ID %u did not complete #%d = %d\n", 559 j, i, hash_data[i][j]); 560 goto error; 561 } 562 } 563 564 if (rte_hash_free_key_with_position(h, pos) < 0) { 565 printf("Failed to free the key #%d\n", keys[i]); 566 goto error; 567 } 568 rte_free(hash_data[i]); 569 hash_data[i] = NULL; 570 } 571 572 cycles = rte_rdtsc_precise() - begin; 573 rte_atomic64_add(&check_cycles, cycles); 574 rte_atomic64_add(&checks, i); 575 576 writer_done = 1; 577 /* Wait and check return value from reader threads */ 578 for (i = 0; i < num_cores; i++) 579 if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) 580 goto error; 581 rte_hash_free(h); 582 rte_free(keys); 583 584 printf("Following numbers include calls to rte_hash functions\n"); 585 printf("Cycles per 1 quiescent state update(online/update/offline): %"PRIi64"\n", 586 rte_atomic64_read(&update_cycles) / 587 rte_atomic64_read(&updates)); 588 589 printf("Cycles per 1 check(start, check): %"PRIi64"\n\n", 590 rte_atomic64_read(&check_cycles) / 591 rte_atomic64_read(&checks)); 592 593 rte_free(t[0]); 594 595 return 0; 596 597 error: 598 writer_done = 1; 599 /* Wait until all readers have exited */ 600 rte_eal_mp_wait_lcore(); 601 602 rte_hash_free(h); 603 rte_free(keys); 604 for (i = 0; i < TOTAL_ENTRY; i++) 605 rte_free(hash_data[i]); 606 607 rte_free(t[0]); 608 609 return -1; 610 } 611 612 static int 613 test_rcu_qsbr_main(void) 614 { 615 uint16_t core_id; 616 617 if (rte_lcore_count() < 3) { 618 printf("Not enough cores for rcu_qsbr_perf_autotest, expecting at least 3\n"); 619 return TEST_SKIPPED; 620 } 621 622 rte_atomic64_init(&updates); 623 rte_atomic64_init(&update_cycles); 624 rte_atomic64_init(&checks); 625 rte_atomic64_init(&check_cycles); 626 627 num_cores = 0; 628 RTE_LCORE_FOREACH_WORKER(core_id) { 629 enabled_core_ids[num_cores] = core_id; 630 num_cores++; 631 } 632 633 printf("Number of cores provided = %d\n", num_cores); 634 printf("Perf test with all reader threads registered\n"); 635 printf("--------------------------------------------\n"); 636 all_registered = 1; 637 638 if (test_rcu_qsbr_perf() < 0) 639 goto test_fail; 640 641 if (test_rcu_qsbr_rperf() < 0) 642 goto test_fail; 643 644 if (test_rcu_qsbr_wperf() < 0) 645 goto test_fail; 646 647 if (test_rcu_qsbr_sw_sv_1qs() < 0) 648 goto test_fail; 649 650 if (test_rcu_qsbr_sw_sv_1qs_non_blocking() < 0) 651 goto test_fail; 652 653 /* Make sure the actual number of cores provided is less than 654 * RTE_MAX_LCORE. This will allow for some threads not 655 * to be registered on the QS variable. 656 */ 657 if (num_cores >= RTE_MAX_LCORE) { 658 printf("Test failed! number of cores provided should be less than %d\n", 659 RTE_MAX_LCORE); 660 goto test_fail; 661 } 662 663 printf("Perf test with some of reader threads registered\n"); 664 printf("------------------------------------------------\n"); 665 all_registered = 0; 666 667 if (test_rcu_qsbr_perf() < 0) 668 goto test_fail; 669 670 if (test_rcu_qsbr_rperf() < 0) 671 goto test_fail; 672 673 if (test_rcu_qsbr_wperf() < 0) 674 goto test_fail; 675 676 if (test_rcu_qsbr_sw_sv_1qs() < 0) 677 goto test_fail; 678 679 if (test_rcu_qsbr_sw_sv_1qs_non_blocking() < 0) 680 goto test_fail; 681 682 printf("\n"); 683 684 return 0; 685 686 test_fail: 687 return -1; 688 } 689 690 REGISTER_TEST_COMMAND(rcu_qsbr_perf_autotest, test_rcu_qsbr_main); 691